]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | /// i1 values are usually inserted by the CFG Structurize pass and they are | |
9 | /// unique in that they can be copied from VALU to SALU registers. | |
10 | /// This is not possible for any other value type. Since there are no | |
11 | /// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1. | |
12 | /// | |
13 | //===----------------------------------------------------------------------===// | |
14 | // | |
15 | ||
16 | #define DEBUG_TYPE "si-i1-copies" | |
17 | #include "AMDGPU.h" | |
18 | #include "AMDGPUSubtarget.h" | |
19 | #include "SIInstrInfo.h" | |
20 | #include "llvm/CodeGen/LiveIntervalAnalysis.h" | |
21 | #include "llvm/CodeGen/MachineDominators.h" | |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" | |
23 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |
25 | #include "llvm/IR/LLVMContext.h" | |
26 | #include "llvm/IR/Function.h" | |
27 | #include "llvm/Support/Debug.h" | |
28 | #include "llvm/Target/TargetMachine.h" | |
29 | ||
30 | using namespace llvm; | |
31 | ||
32 | namespace { | |
33 | ||
34 | class SILowerI1Copies : public MachineFunctionPass { | |
35 | public: | |
36 | static char ID; | |
37 | ||
38 | public: | |
39 | SILowerI1Copies() : MachineFunctionPass(ID) { | |
40 | initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry()); | |
41 | } | |
42 | ||
43 | bool runOnMachineFunction(MachineFunction &MF) override; | |
44 | ||
45 | const char *getPassName() const override { | |
85aaf69f | 46 | return "SI Lower i1 Copies"; |
1a4d82fc JJ |
47 | } |
48 | ||
49 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |
50 | AU.addRequired<MachineDominatorTree>(); | |
51 | AU.setPreservesCFG(); | |
52 | MachineFunctionPass::getAnalysisUsage(AU); | |
53 | } | |
54 | }; | |
55 | ||
56 | } // End anonymous namespace. | |
57 | ||
58 | INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, | |
85aaf69f | 59 | "SI Lower i1 Copies", false, false) |
1a4d82fc JJ |
60 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
61 | INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE, | |
85aaf69f | 62 | "SI Lower i1 Copies", false, false) |
1a4d82fc JJ |
63 | |
64 | char SILowerI1Copies::ID = 0; | |
65 | ||
66 | char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID; | |
67 | ||
68 | FunctionPass *llvm::createSILowerI1CopiesPass() { | |
69 | return new SILowerI1Copies(); | |
70 | } | |
71 | ||
72 | bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { | |
73 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |
74 | const SIInstrInfo *TII = | |
75 | static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); | |
76 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); | |
77 | std::vector<unsigned> I1Defs; | |
78 | ||
79 | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); | |
80 | BI != BE; ++BI) { | |
81 | ||
82 | MachineBasicBlock &MBB = *BI; | |
83 | MachineBasicBlock::iterator I, Next; | |
84 | for (I = MBB.begin(); I != MBB.end(); I = Next) { | |
85 | Next = std::next(I); | |
86 | MachineInstr &MI = *I; | |
87 | ||
85aaf69f SL |
88 | if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) { |
89 | unsigned Reg = MI.getOperand(0).getReg(); | |
90 | const TargetRegisterClass *RC = MRI.getRegClass(Reg); | |
91 | if (RC == &AMDGPU::VReg_1RegClass) | |
92 | MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass); | |
1a4d82fc JJ |
93 | continue; |
94 | } | |
95 | ||
85aaf69f | 96 | if (MI.getOpcode() != AMDGPU::COPY) |
1a4d82fc | 97 | continue; |
1a4d82fc | 98 | |
85aaf69f SL |
99 | const MachineOperand &Dst = MI.getOperand(0); |
100 | const MachineOperand &Src = MI.getOperand(1); | |
1a4d82fc | 101 | |
85aaf69f SL |
102 | if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) || |
103 | !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) | |
1a4d82fc | 104 | continue; |
1a4d82fc | 105 | |
85aaf69f SL |
106 | const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg()); |
107 | const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg()); | |
1a4d82fc JJ |
108 | |
109 | if (DstRC == &AMDGPU::VReg_1RegClass && | |
110 | TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { | |
85aaf69f SL |
111 | I1Defs.push_back(Dst.getReg()); |
112 | DebugLoc DL = MI.getDebugLoc(); | |
113 | ||
114 | MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg()); | |
115 | if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) { | |
116 | if (DefInst->getOperand(1).isImm()) { | |
117 | I1Defs.push_back(Dst.getReg()); | |
118 | ||
119 | int64_t Val = DefInst->getOperand(1).getImm(); | |
120 | assert(Val == 0 || Val == -1); | |
121 | ||
122 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32)) | |
123 | .addOperand(Dst) | |
124 | .addImm(Val); | |
125 | MI.eraseFromParent(); | |
126 | continue; | |
127 | } | |
128 | } | |
129 | ||
130 | BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) | |
131 | .addOperand(Dst) | |
132 | .addImm(0) | |
133 | .addImm(-1) | |
134 | .addOperand(Src); | |
1a4d82fc JJ |
135 | MI.eraseFromParent(); |
136 | } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && | |
137 | SrcRC == &AMDGPU::VReg_1RegClass) { | |
138 | BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64)) | |
85aaf69f SL |
139 | .addOperand(Dst) |
140 | .addOperand(Src) | |
141 | .addImm(0); | |
1a4d82fc JJ |
142 | MI.eraseFromParent(); |
143 | } | |
144 | } | |
145 | } | |
146 | ||
147 | for (unsigned Reg : I1Defs) | |
85aaf69f | 148 | MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass); |
1a4d82fc JJ |
149 | |
150 | return false; | |
151 | } |