]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // When a load/store accesses the generic address space, checks whether the | |
11 | // address is casted from a non-generic address space. If so, remove this | |
12 | // addrspacecast because accessing non-generic address spaces is typically | |
13 | // faster. Besides seeking addrspacecasts, this optimization also traces into | |
14 | // the base pointer of a GEP. | |
15 | // | |
16 | // For instance, the code below loads a float from an array allocated in | |
17 | // addrspace(3). | |
18 | // | |
19 | // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* | |
20 | // %1 = gep [10 x float]* %0, i64 0, i64 %i | |
21 | // %2 = load float* %1 ; emits ld.f32 | |
22 | // | |
23 | // First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast | |
24 | // and the GEP to expose more optimization opportunities to function | |
25 | // optimizeMemoryInst. The intermediate code looks like: | |
26 | // | |
27 | // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i | |
28 | // %1 = addrspacecast float addrspace(3)* %0 to float* | |
29 | // %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly | |
30 | // | |
31 | // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed | |
32 | // generic pointers, and folds the load and the addrspacecast into a load from | |
33 | // the original address space. The final code looks like: | |
34 | // | |
35 | // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i | |
36 | // %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 | |
37 | // | |
38 | // This pass may remove an addrspacecast in a different BB. Therefore, we | |
39 | // implement it as a FunctionPass. | |
40 | // | |
41 | //===----------------------------------------------------------------------===// | |
42 | ||
43 | #include "NVPTX.h" | |
44 | #include "llvm/IR/Function.h" | |
45 | #include "llvm/IR/Instructions.h" | |
46 | #include "llvm/IR/Operator.h" | |
47 | #include "llvm/Support/CommandLine.h" | |
48 | ||
49 | using namespace llvm; | |
50 | ||
51 | // An option to disable this optimization. Enable it by default. | |
52 | static cl::opt<bool> DisableFavorNonGeneric( | |
53 | "disable-nvptx-favor-non-generic", | |
54 | cl::init(false), | |
55 | cl::desc("Do not convert generic address space usage " | |
56 | "to non-generic address space usage"), | |
57 | cl::Hidden); | |
58 | ||
59 | namespace { | |
60 | /// \brief NVPTXFavorNonGenericAddrSpaces | |
61 | class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { | |
62 | public: | |
63 | static char ID; | |
64 | NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} | |
65 | ||
66 | bool runOnFunction(Function &F) override; | |
67 | ||
68 | /// Optimizes load/store instructions. Idx is the index of the pointer operand | |
69 | /// (0 for load, and 1 for store). Returns true if it changes anything. | |
70 | bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); | |
71 | /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, | |
72 | /// indices)". This reordering exposes to optimizeMemoryInstruction more | |
73 | /// optimization opportunities on loads and stores. Returns true if it changes | |
74 | /// the program. | |
75 | bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); | |
76 | }; | |
77 | } | |
78 | ||
79 | char NVPTXFavorNonGenericAddrSpaces::ID = 0; | |
80 | ||
81 | namespace llvm { | |
82 | void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); | |
83 | } | |
84 | INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", | |
85 | "Remove unnecessary non-generic-to-generic addrspacecasts", | |
86 | false, false) | |
87 | ||
88 | // Decides whether removing Cast is valid and beneficial. Cast can be an | |
89 | // instruction or a constant expression. | |
90 | static bool IsEliminableAddrSpaceCast(Operator *Cast) { | |
91 | // Returns false if not even an addrspacecast. | |
92 | if (Cast->getOpcode() != Instruction::AddrSpaceCast) | |
93 | return false; | |
94 | ||
95 | Value *Src = Cast->getOperand(0); | |
96 | PointerType *SrcTy = cast<PointerType>(Src->getType()); | |
97 | PointerType *DestTy = cast<PointerType>(Cast->getType()); | |
98 | // TODO: For now, we only handle the case where the addrspacecast only changes | |
99 | // the address space but not the type. If the type also changes, we could | |
100 | // still get rid of the addrspacecast by adding an extra bitcast, but we | |
101 | // rarely see such scenarios. | |
102 | if (SrcTy->getElementType() != DestTy->getElementType()) | |
103 | return false; | |
104 | ||
105 | // Checks whether the addrspacecast is from a non-generic address space to the | |
106 | // generic address space. | |
107 | return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && | |
108 | DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); | |
109 | } | |
110 | ||
111 | bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( | |
112 | GEPOperator *GEP) { | |
113 | Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); | |
114 | if (!Cast) | |
115 | return false; | |
116 | ||
117 | if (!IsEliminableAddrSpaceCast(Cast)) | |
118 | return false; | |
119 | ||
120 | SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); | |
121 | if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { | |
122 | // %1 = gep (addrspacecast X), indices | |
123 | // => | |
124 | // %0 = gep X, indices | |
125 | // %1 = addrspacecast %0 | |
126 | GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), | |
127 | Indices, | |
128 | GEP->getName(), | |
129 | GEPI); | |
130 | NewGEPI->setIsInBounds(GEP->isInBounds()); | |
131 | GEP->replaceAllUsesWith( | |
132 | new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); | |
133 | } else { | |
134 | // GEP is a constant expression. | |
135 | Constant *NewGEPCE = ConstantExpr::getGetElementPtr( | |
136 | cast<Constant>(Cast->getOperand(0)), | |
137 | Indices, | |
138 | GEP->isInBounds()); | |
139 | GEP->replaceAllUsesWith( | |
140 | ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); | |
141 | } | |
142 | ||
143 | return true; | |
144 | } | |
145 | ||
146 | bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, | |
147 | unsigned Idx) { | |
148 | // If the pointer operand is a GEP, hoist the addrspacecast if any from the | |
149 | // GEP to expose more optimization opportunites. | |
150 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { | |
151 | hoistAddrSpaceCastFromGEP(GEP); | |
152 | } | |
153 | ||
154 | // load/store (addrspacecast X) => load/store X if shortcutting the | |
155 | // addrspacecast is valid and can improve performance. | |
156 | // | |
157 | // e.g., | |
158 | // %1 = addrspacecast float addrspace(3)* %0 to float* | |
159 | // %2 = load float* %1 | |
160 | // -> | |
161 | // %2 = load float addrspace(3)* %0 | |
162 | // | |
163 | // Note: the addrspacecast can also be a constant expression. | |
164 | if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { | |
165 | if (IsEliminableAddrSpaceCast(Cast)) { | |
166 | MI->setOperand(Idx, Cast->getOperand(0)); | |
167 | return true; | |
168 | } | |
169 | } | |
170 | ||
171 | return false; | |
172 | } | |
173 | ||
174 | bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { | |
175 | if (DisableFavorNonGeneric) | |
176 | return false; | |
177 | ||
178 | bool Changed = false; | |
179 | for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { | |
180 | for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) { | |
181 | if (isa<LoadInst>(I)) { | |
182 | // V = load P | |
183 | Changed |= optimizeMemoryInstruction(I, 0); | |
184 | } else if (isa<StoreInst>(I)) { | |
185 | // store V, P | |
186 | Changed |= optimizeMemoryInstruction(I, 1); | |
187 | } | |
188 | } | |
189 | } | |
190 | return Changed; | |
191 | } | |
192 | ||
193 | FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() { | |
194 | return new NVPTXFavorNonGenericAddrSpaces(); | |
195 | } |