]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements the AArch64PromoteConstant pass which promotes constants | |
11 | // to global variables when this is likely to be more efficient. Currently only | |
12 | // types related to constant vector (i.e., constant vector, array of constant | |
13 | // vectors, constant structure with a constant vector field, etc.) are promoted | |
14 | // to global variables. Constant vectors are likely to be lowered in target | |
15 | // constant pool during instruction selection already; therefore, the access | |
16 | // will remain the same (memory load), but the structure types are not split | |
17 | // into different constant pool accesses for each field. A bonus side effect is | |
18 | // that created globals may be merged by the global merge pass. | |
19 | // | |
20 | // FIXME: This pass may be useful for other targets too. | |
21 | //===----------------------------------------------------------------------===// | |
22 | ||
23 | #include "AArch64.h" | |
24 | #include "llvm/ADT/DenseMap.h" | |
25 | #include "llvm/ADT/SmallSet.h" | |
26 | #include "llvm/ADT/SmallVector.h" | |
27 | #include "llvm/ADT/Statistic.h" | |
28 | #include "llvm/IR/Constants.h" | |
29 | #include "llvm/IR/Dominators.h" | |
30 | #include "llvm/IR/Function.h" | |
31 | #include "llvm/IR/GlobalVariable.h" | |
32 | #include "llvm/IR/IRBuilder.h" | |
33 | #include "llvm/IR/InlineAsm.h" | |
34 | #include "llvm/IR/Instructions.h" | |
35 | #include "llvm/IR/IntrinsicInst.h" | |
36 | #include "llvm/IR/Module.h" | |
37 | #include "llvm/Pass.h" | |
38 | #include "llvm/Support/CommandLine.h" | |
39 | #include "llvm/Support/Debug.h" | |
40 | ||
41 | using namespace llvm; | |
42 | ||
43 | #define DEBUG_TYPE "aarch64-promote-const" | |
44 | ||
45 | // Stress testing mode - disable heuristics. | |
46 | static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden, | |
47 | cl::desc("Promote all vector constants")); | |
48 | ||
49 | STATISTIC(NumPromoted, "Number of promoted constants"); | |
50 | STATISTIC(NumPromotedUses, "Number of promoted constants uses"); | |
51 | ||
52 | //===----------------------------------------------------------------------===// | |
53 | // AArch64PromoteConstant | |
54 | //===----------------------------------------------------------------------===// | |
55 | ||
56 | namespace { | |
57 | /// Promotes interesting constant into global variables. | |
58 | /// The motivating example is: | |
59 | /// static const uint16_t TableA[32] = { | |
60 | /// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, | |
61 | /// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, | |
62 | /// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, | |
63 | /// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, | |
64 | /// }; | |
65 | /// | |
66 | /// uint8x16x4_t LoadStatic(void) { | |
67 | /// uint8x16x4_t ret; | |
68 | /// ret.val[0] = vld1q_u16(TableA + 0); | |
69 | /// ret.val[1] = vld1q_u16(TableA + 8); | |
70 | /// ret.val[2] = vld1q_u16(TableA + 16); | |
71 | /// ret.val[3] = vld1q_u16(TableA + 24); | |
72 | /// return ret; | |
73 | /// } | |
74 | /// | |
75 | /// The constants in this example are folded into the uses. Thus, 4 different | |
76 | /// constants are created. | |
77 | /// | |
78 | /// As their type is vector the cheapest way to create them is to load them | |
79 | /// for the memory. | |
80 | /// | |
81 | /// Therefore the final assembly final has 4 different loads. With this pass | |
82 | /// enabled, only one load is issued for the constants. | |
83 | class AArch64PromoteConstant : public ModulePass { | |
84 | ||
85 | public: | |
86 | static char ID; | |
87 | AArch64PromoteConstant() : ModulePass(ID) {} | |
88 | ||
89 | const char *getPassName() const override { return "AArch64 Promote Constant"; } | |
90 | ||
91 | /// Iterate over the functions and promote the interesting constants into | |
92 | /// global variables with module scope. | |
93 | bool runOnModule(Module &M) override { | |
94 | DEBUG(dbgs() << getPassName() << '\n'); | |
95 | bool Changed = false; | |
96 | for (auto &MF : M) { | |
97 | Changed |= runOnFunction(MF); | |
98 | } | |
99 | return Changed; | |
100 | } | |
101 | ||
102 | private: | |
103 | /// Look for interesting constants used within the given function. | |
104 | /// Promote them into global variables, load these global variables within | |
105 | /// the related function, so that the number of inserted load is minimal. | |
106 | bool runOnFunction(Function &F); | |
107 | ||
108 | // This transformation requires dominator info | |
109 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |
110 | AU.setPreservesCFG(); | |
111 | AU.addRequired<DominatorTreeWrapperPass>(); | |
112 | AU.addPreserved<DominatorTreeWrapperPass>(); | |
113 | } | |
114 | ||
115 | /// Type to store a list of User. | |
116 | typedef SmallVector<Value::user_iterator, 4> Users; | |
117 | /// Map an insertion point to all the uses it dominates. | |
118 | typedef DenseMap<Instruction *, Users> InsertionPoints; | |
119 | /// Map a function to the required insertion point of load for a | |
120 | /// global variable. | |
121 | typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc; | |
122 | ||
123 | /// Find the closest point that dominates the given Use. | |
124 | Instruction *findInsertionPoint(Value::user_iterator &Use); | |
125 | ||
126 | /// Check if the given insertion point is dominated by an existing | |
127 | /// insertion point. | |
128 | /// If true, the given use is added to the list of dominated uses for | |
129 | /// the related existing point. | |
130 | /// \param NewPt the insertion point to be checked | |
131 | /// \param UseIt the use to be added into the list of dominated uses | |
132 | /// \param InsertPts existing insertion points | |
133 | /// \pre NewPt and all instruction in InsertPts belong to the same function | |
134 | /// \return true if one of the insertion point in InsertPts dominates NewPt, | |
135 | /// false otherwise | |
136 | bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt, | |
137 | InsertionPoints &InsertPts); | |
138 | ||
139 | /// Check if the given insertion point can be merged with an existing | |
140 | /// insertion point in a common dominator. | |
141 | /// If true, the given use is added to the list of the created insertion | |
142 | /// point. | |
143 | /// \param NewPt the insertion point to be checked | |
144 | /// \param UseIt the use to be added into the list of dominated uses | |
145 | /// \param InsertPts existing insertion points | |
146 | /// \pre NewPt and all instruction in InsertPts belong to the same function | |
147 | /// \pre isDominated returns false for the exact same parameters. | |
148 | /// \return true if it exists an insertion point in InsertPts that could | |
149 | /// have been merged with NewPt in a common dominator, | |
150 | /// false otherwise | |
151 | bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt, | |
152 | InsertionPoints &InsertPts); | |
153 | ||
154 | /// Compute the minimal insertion points to dominates all the interesting | |
155 | /// uses of value. | |
156 | /// Insertion points are group per function and each insertion point | |
157 | /// contains a list of all the uses it dominates within the related function | |
158 | /// \param Val constant to be examined | |
159 | /// \param[out] InsPtsPerFunc output storage of the analysis | |
160 | void computeInsertionPoints(Constant *Val, | |
161 | InsertionPointsPerFunc &InsPtsPerFunc); | |
162 | ||
163 | /// Insert a definition of a new global variable at each point contained in | |
164 | /// InsPtsPerFunc and update the related uses (also contained in | |
165 | /// InsPtsPerFunc). | |
166 | bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc); | |
167 | ||
168 | /// Compute the minimal insertion points to dominate all the interesting | |
169 | /// uses of Val and insert a definition of a new global variable | |
170 | /// at these points. | |
171 | /// Also update the uses of Val accordingly. | |
172 | /// Currently a use of Val is considered interesting if: | |
173 | /// - Val is not UndefValue | |
174 | /// - Val is not zeroinitialized | |
175 | /// - Replacing Val per a load of a global variable is valid. | |
176 | /// \see shouldConvert for more details | |
177 | bool computeAndInsertDefinitions(Constant *Val); | |
178 | ||
179 | /// Promote the given constant into a global variable if it is expected to | |
180 | /// be profitable. | |
181 | /// \return true if Cst has been promoted | |
182 | bool promoteConstant(Constant *Cst); | |
183 | ||
184 | /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. | |
185 | /// Append UseIt to this list and delete the entry of IPI in InsertPts. | |
186 | static void appendAndTransferDominatedUses(Instruction *NewPt, | |
187 | Value::user_iterator &UseIt, | |
188 | InsertionPoints::iterator &IPI, | |
189 | InsertionPoints &InsertPts) { | |
190 | // Record the dominated use. | |
191 | IPI->second.push_back(UseIt); | |
192 | // Transfer the dominated uses of IPI to NewPt | |
193 | // Inserting into the DenseMap may invalidate existing iterator. | |
194 | // Keep a copy of the key to find the iterator to erase. | |
195 | Instruction *OldInstr = IPI->first; | |
85aaf69f | 196 | InsertPts[NewPt] = std::move(IPI->second); |
1a4d82fc JJ |
197 | // Erase IPI. |
198 | IPI = InsertPts.find(OldInstr); | |
199 | InsertPts.erase(IPI); | |
200 | } | |
201 | }; | |
202 | } // end anonymous namespace | |
203 | ||
204 | char AArch64PromoteConstant::ID = 0; | |
205 | ||
206 | namespace llvm { | |
207 | void initializeAArch64PromoteConstantPass(PassRegistry &); | |
208 | } | |
209 | ||
210 | INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", | |
211 | "AArch64 Promote Constant Pass", false, false) | |
212 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) | |
213 | INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", | |
214 | "AArch64 Promote Constant Pass", false, false) | |
215 | ||
216 | ModulePass *llvm::createAArch64PromoteConstantPass() { | |
217 | return new AArch64PromoteConstant(); | |
218 | } | |
219 | ||
220 | /// Check if the given type uses a vector type. | |
221 | static bool isConstantUsingVectorTy(const Type *CstTy) { | |
222 | if (CstTy->isVectorTy()) | |
223 | return true; | |
224 | if (CstTy->isStructTy()) { | |
225 | for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); | |
226 | EltIdx < EndEltIdx; ++EltIdx) | |
227 | if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) | |
228 | return true; | |
229 | } else if (CstTy->isArrayTy()) | |
230 | return isConstantUsingVectorTy(CstTy->getArrayElementType()); | |
231 | return false; | |
232 | } | |
233 | ||
234 | /// Check if the given use (Instruction + OpIdx) of Cst should be converted into | |
235 | /// a load of a global variable initialized with Cst. | |
236 | /// A use should be converted if it is legal to do so. | |
237 | /// For instance, it is not legal to turn the mask operand of a shuffle vector | |
238 | /// into a load of a global variable. | |
239 | static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, | |
240 | unsigned OpIdx) { | |
241 | // shufflevector instruction expects a const for the mask argument, i.e., the | |
242 | // third argument. Do not promote this use in that case. | |
243 | if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) | |
244 | return false; | |
245 | ||
246 | // extractvalue instruction expects a const idx. | |
247 | if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) | |
248 | return false; | |
249 | ||
250 | // extractvalue instruction expects a const idx. | |
251 | if (isa<const InsertValueInst>(Instr) && OpIdx > 1) | |
252 | return false; | |
253 | ||
254 | if (isa<const AllocaInst>(Instr) && OpIdx > 0) | |
255 | return false; | |
256 | ||
257 | // Alignment argument must be constant. | |
258 | if (isa<const LoadInst>(Instr) && OpIdx > 0) | |
259 | return false; | |
260 | ||
261 | // Alignment argument must be constant. | |
262 | if (isa<const StoreInst>(Instr) && OpIdx > 1) | |
263 | return false; | |
264 | ||
265 | // Index must be constant. | |
266 | if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) | |
267 | return false; | |
268 | ||
269 | // Personality function and filters must be constant. | |
270 | // Give up on that instruction. | |
271 | if (isa<const LandingPadInst>(Instr)) | |
272 | return false; | |
273 | ||
274 | // Switch instruction expects constants to compare to. | |
275 | if (isa<const SwitchInst>(Instr)) | |
276 | return false; | |
277 | ||
278 | // Expected address must be a constant. | |
279 | if (isa<const IndirectBrInst>(Instr)) | |
280 | return false; | |
281 | ||
282 | // Do not mess with intrinsics. | |
283 | if (isa<const IntrinsicInst>(Instr)) | |
284 | return false; | |
285 | ||
286 | // Do not mess with inline asm. | |
287 | const CallInst *CI = dyn_cast<const CallInst>(Instr); | |
288 | if (CI && isa<const InlineAsm>(CI->getCalledValue())) | |
289 | return false; | |
290 | ||
291 | return true; | |
292 | } | |
293 | ||
294 | /// Check if the given Cst should be converted into | |
295 | /// a load of a global variable initialized with Cst. | |
296 | /// A constant should be converted if it is likely that the materialization of | |
297 | /// the constant will be tricky. Thus, we give up on zero or undef values. | |
298 | /// | |
299 | /// \todo Currently, accept only vector related types. | |
300 | /// Also we give up on all simple vector type to keep the existing | |
301 | /// behavior. Otherwise, we should push here all the check of the lowering of | |
302 | /// BUILD_VECTOR. By giving up, we lose the potential benefit of merging | |
303 | /// constant via global merge and the fact that the same constant is stored | |
304 | /// only once with this method (versus, as many function that uses the constant | |
305 | /// for the regular approach, even for float). | |
306 | /// Again, the simplest solution would be to promote every | |
307 | /// constant and rematerialize them when they are actually cheap to create. | |
308 | static bool shouldConvert(const Constant *Cst) { | |
309 | if (isa<const UndefValue>(Cst)) | |
310 | return false; | |
311 | ||
312 | // FIXME: In some cases, it may be interesting to promote in memory | |
313 | // a zero initialized constant. | |
314 | // E.g., when the type of Cst require more instructions than the | |
315 | // adrp/add/load sequence or when this sequence can be shared by several | |
316 | // instances of Cst. | |
317 | // Ideally, we could promote this into a global and rematerialize the constant | |
318 | // when it was a bad idea. | |
319 | if (Cst->isZeroValue()) | |
320 | return false; | |
321 | ||
322 | if (Stress) | |
323 | return true; | |
324 | ||
325 | // FIXME: see function \todo | |
326 | if (Cst->getType()->isVectorTy()) | |
327 | return false; | |
328 | return isConstantUsingVectorTy(Cst->getType()); | |
329 | } | |
330 | ||
331 | Instruction * | |
332 | AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { | |
333 | // If this user is a phi, the insertion point is in the related | |
334 | // incoming basic block. | |
335 | PHINode *PhiInst = dyn_cast<PHINode>(*Use); | |
336 | Instruction *InsertionPoint; | |
337 | if (PhiInst) | |
338 | InsertionPoint = | |
339 | PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); | |
340 | else | |
341 | InsertionPoint = dyn_cast<Instruction>(*Use); | |
342 | assert(InsertionPoint && "User is not an instruction!"); | |
343 | return InsertionPoint; | |
344 | } | |
345 | ||
346 | bool AArch64PromoteConstant::isDominated(Instruction *NewPt, | |
347 | Value::user_iterator &UseIt, | |
348 | InsertionPoints &InsertPts) { | |
349 | ||
350 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( | |
351 | *NewPt->getParent()->getParent()).getDomTree(); | |
352 | ||
353 | // Traverse all the existing insertion points and check if one is dominating | |
354 | // NewPt. If it is, remember that. | |
355 | for (auto &IPI : InsertPts) { | |
356 | if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || | |
357 | // When IPI.first is a terminator instruction, DT may think that | |
358 | // the result is defined on the edge. | |
359 | // Here we are testing the insertion point, not the definition. | |
360 | (IPI.first->getParent() != NewPt->getParent() && | |
361 | DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { | |
362 | // No need to insert this point. Just record the dominated use. | |
363 | DEBUG(dbgs() << "Insertion point dominated by:\n"); | |
364 | DEBUG(IPI.first->print(dbgs())); | |
365 | DEBUG(dbgs() << '\n'); | |
366 | IPI.second.push_back(UseIt); | |
367 | return true; | |
368 | } | |
369 | } | |
370 | return false; | |
371 | } | |
372 | ||
373 | bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, | |
374 | Value::user_iterator &UseIt, | |
375 | InsertionPoints &InsertPts) { | |
376 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( | |
377 | *NewPt->getParent()->getParent()).getDomTree(); | |
378 | BasicBlock *NewBB = NewPt->getParent(); | |
379 | ||
380 | // Traverse all the existing insertion point and check if one is dominated by | |
381 | // NewPt and thus useless or can be combined with NewPt into a common | |
382 | // dominator. | |
383 | for (InsertionPoints::iterator IPI = InsertPts.begin(), | |
384 | EndIPI = InsertPts.end(); | |
385 | IPI != EndIPI; ++IPI) { | |
386 | BasicBlock *CurBB = IPI->first->getParent(); | |
387 | if (NewBB == CurBB) { | |
388 | // Instructions are in the same block. | |
389 | // By construction, NewPt is dominating the other. | |
390 | // Indeed, isDominated returned false with the exact same arguments. | |
391 | DEBUG(dbgs() << "Merge insertion point with:\n"); | |
392 | DEBUG(IPI->first->print(dbgs())); | |
393 | DEBUG(dbgs() << "\nat considered insertion point.\n"); | |
394 | appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); | |
395 | return true; | |
396 | } | |
397 | ||
398 | // Look for a common dominator | |
399 | BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); | |
400 | // If none exists, we cannot merge these two points. | |
401 | if (!CommonDominator) | |
402 | continue; | |
403 | ||
404 | if (CommonDominator != NewBB) { | |
405 | // By construction, the CommonDominator cannot be CurBB. | |
406 | assert(CommonDominator != CurBB && | |
407 | "Instruction has not been rejected during isDominated check!"); | |
408 | // Take the last instruction of the CommonDominator as insertion point | |
409 | NewPt = CommonDominator->getTerminator(); | |
410 | } | |
411 | // else, CommonDominator is the block of NewBB, hence NewBB is the last | |
412 | // possible insertion point in that block. | |
413 | DEBUG(dbgs() << "Merge insertion point with:\n"); | |
414 | DEBUG(IPI->first->print(dbgs())); | |
415 | DEBUG(dbgs() << '\n'); | |
416 | DEBUG(NewPt->print(dbgs())); | |
417 | DEBUG(dbgs() << '\n'); | |
418 | appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); | |
419 | return true; | |
420 | } | |
421 | return false; | |
422 | } | |
423 | ||
424 | void AArch64PromoteConstant::computeInsertionPoints( | |
425 | Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { | |
426 | DEBUG(dbgs() << "** Compute insertion points **\n"); | |
427 | for (Value::user_iterator UseIt = Val->user_begin(), | |
428 | EndUseIt = Val->user_end(); | |
429 | UseIt != EndUseIt; ++UseIt) { | |
430 | // If the user is not an Instruction, we cannot modify it. | |
431 | if (!isa<Instruction>(*UseIt)) | |
432 | continue; | |
433 | ||
434 | // Filter out uses that should not be converted. | |
435 | if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo())) | |
436 | continue; | |
437 | ||
438 | DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n"); | |
439 | DEBUG((*UseIt)->print(dbgs())); | |
440 | DEBUG(dbgs() << '\n'); | |
441 | ||
442 | Instruction *InsertionPoint = findInsertionPoint(UseIt); | |
443 | ||
444 | DEBUG(dbgs() << "Considered insertion point:\n"); | |
445 | DEBUG(InsertionPoint->print(dbgs())); | |
446 | DEBUG(dbgs() << '\n'); | |
447 | ||
448 | // Check if the current insertion point is useless, i.e., it is dominated | |
449 | // by another one. | |
450 | InsertionPoints &InsertPts = | |
451 | InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; | |
452 | if (isDominated(InsertionPoint, UseIt, InsertPts)) | |
453 | continue; | |
454 | // This insertion point is useful, check if we can merge some insertion | |
455 | // point in a common dominator or if NewPt dominates an existing one. | |
456 | if (tryAndMerge(InsertionPoint, UseIt, InsertPts)) | |
457 | continue; | |
458 | ||
459 | DEBUG(dbgs() << "Keep considered insertion point\n"); | |
460 | ||
461 | // It is definitely useful by its own | |
462 | InsertPts[InsertionPoint].push_back(UseIt); | |
463 | } | |
464 | } | |
465 | ||
466 | bool AArch64PromoteConstant::insertDefinitions( | |
467 | Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc) { | |
468 | // We will create one global variable per Module. | |
469 | DenseMap<Module *, GlobalVariable *> ModuleToMergedGV; | |
470 | bool HasChanged = false; | |
471 | ||
472 | // Traverse all insertion points in all the function. | |
473 | for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(), | |
474 | EndIt = InsPtsPerFunc.end(); | |
475 | FctToInstPtsIt != EndIt; ++FctToInstPtsIt) { | |
476 | InsertionPoints &InsertPts = FctToInstPtsIt->second; | |
477 | // Do more checking for debug purposes. | |
478 | #ifndef NDEBUG | |
479 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( | |
480 | *FctToInstPtsIt->first).getDomTree(); | |
481 | #endif | |
482 | GlobalVariable *PromotedGV; | |
483 | assert(!InsertPts.empty() && "Empty uses does not need a definition"); | |
484 | ||
485 | Module *M = FctToInstPtsIt->first->getParent(); | |
486 | DenseMap<Module *, GlobalVariable *>::iterator MapIt = | |
487 | ModuleToMergedGV.find(M); | |
488 | if (MapIt == ModuleToMergedGV.end()) { | |
489 | PromotedGV = new GlobalVariable( | |
490 | *M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr, | |
491 | "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); | |
492 | PromotedGV->setInitializer(Cst); | |
493 | ModuleToMergedGV[M] = PromotedGV; | |
494 | DEBUG(dbgs() << "Global replacement: "); | |
495 | DEBUG(PromotedGV->print(dbgs())); | |
496 | DEBUG(dbgs() << '\n'); | |
497 | ++NumPromoted; | |
498 | HasChanged = true; | |
499 | } else { | |
500 | PromotedGV = MapIt->second; | |
501 | } | |
502 | ||
503 | for (InsertionPoints::iterator IPI = InsertPts.begin(), | |
504 | EndIPI = InsertPts.end(); | |
505 | IPI != EndIPI; ++IPI) { | |
506 | // Create the load of the global variable. | |
507 | IRBuilder<> Builder(IPI->first->getParent(), IPI->first); | |
508 | LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); | |
509 | DEBUG(dbgs() << "**********\n"); | |
510 | DEBUG(dbgs() << "New def: "); | |
511 | DEBUG(LoadedCst->print(dbgs())); | |
512 | DEBUG(dbgs() << '\n'); | |
513 | ||
514 | // Update the dominated uses. | |
515 | Users &DominatedUsers = IPI->second; | |
516 | for (Value::user_iterator Use : DominatedUsers) { | |
517 | #ifndef NDEBUG | |
518 | assert((DT.dominates(LoadedCst, cast<Instruction>(*Use)) || | |
519 | (isa<PHINode>(*Use) && | |
520 | DT.dominates(LoadedCst, findInsertionPoint(Use)))) && | |
521 | "Inserted definition does not dominate all its uses!"); | |
522 | #endif | |
523 | DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":"); | |
524 | DEBUG(Use->print(dbgs())); | |
525 | DEBUG(dbgs() << '\n'); | |
526 | Use->setOperand(Use.getOperandNo(), LoadedCst); | |
527 | ++NumPromotedUses; | |
528 | } | |
529 | } | |
530 | } | |
531 | return HasChanged; | |
532 | } | |
533 | ||
534 | bool AArch64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { | |
535 | InsertionPointsPerFunc InsertPtsPerFunc; | |
536 | computeInsertionPoints(Val, InsertPtsPerFunc); | |
537 | return insertDefinitions(Val, InsertPtsPerFunc); | |
538 | } | |
539 | ||
540 | bool AArch64PromoteConstant::promoteConstant(Constant *Cst) { | |
541 | assert(Cst && "Given variable is not a valid constant."); | |
542 | ||
543 | if (!shouldConvert(Cst)) | |
544 | return false; | |
545 | ||
546 | DEBUG(dbgs() << "******************************\n"); | |
547 | DEBUG(dbgs() << "Candidate constant: "); | |
548 | DEBUG(Cst->print(dbgs())); | |
549 | DEBUG(dbgs() << '\n'); | |
550 | ||
551 | return computeAndInsertDefinitions(Cst); | |
552 | } | |
553 | ||
554 | bool AArch64PromoteConstant::runOnFunction(Function &F) { | |
555 | // Look for instructions using constant vector. Promote that constant to a | |
556 | // global variable. Create as few loads of this variable as possible and | |
557 | // update the uses accordingly. | |
558 | bool LocalChange = false; | |
559 | SmallSet<Constant *, 8> AlreadyChecked; | |
560 | ||
561 | for (auto &MBB : F) { | |
562 | for (auto &MI : MBB) { | |
563 | // Traverse the operand, looking for constant vectors. Replace them by a | |
564 | // load of a global variable of constant vector type. | |
565 | for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); | |
566 | OpIdx != EndOpIdx; ++OpIdx) { | |
567 | Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx)); | |
568 | // There is no point in promoting global values as they are already | |
569 | // global. Do not promote constant expressions either, as they may | |
570 | // require some code expansion. | |
571 | if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) && | |
85aaf69f | 572 | AlreadyChecked.insert(Cst).second) |
1a4d82fc JJ |
573 | LocalChange |= promoteConstant(Cst); |
574 | } | |
575 | } | |
576 | } | |
577 | return LocalChange; | |
578 | } |