]> git.proxmox.com Git - rustc.git/blob - src/llvm/lib/Transforms/Scalar/NullCheckElimination.cpp
Imported Upstream version 1.0.0+dfsg1
[rustc.git] / src / llvm / lib / Transforms / Scalar / NullCheckElimination.cpp
1 //===-- NullCheckElimination.cpp - Null Check Elimination Pass ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "llvm/Transforms/Scalar.h"
11 #include "llvm/ADT/DenseSet.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/Function.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Pass.h"
18 using namespace llvm;
19
20 #define DEBUG_TYPE "null-check-elimination"
21
22 namespace {
23 struct NullCheckElimination : public FunctionPass {
24 static char ID;
25 NullCheckElimination() : FunctionPass(ID) {
26 initializeNullCheckEliminationPass(*PassRegistry::getPassRegistry());
27 }
28 bool runOnFunction(Function &F) override;
29
30 void getAnalysisUsage(AnalysisUsage &AU) const override {
31 AU.setPreservesCFG();
32 }
33
34 private:
35 static const unsigned kPhiLimit = 16;
36 typedef SmallPtrSet<PHINode*, kPhiLimit> SmallPhiSet;
37
38 enum CmpKind {
39 /// A null check of an unconditionally nonnull-or-poison value.
40 NullCheckDefiniteCmp,
41
42 /// A null check of the phi representing a nontrivial inbounds recurrence,
43 /// which is not known to be unconditionally nonnull-or-poison.
44 NullCheckRecurrenceCmp,
45
46 /// A comparison of the phi representing a nontrivial inbounds recurrence
47 /// with an inbounds GEP derived from the base of the recurrence, which
48 /// will typically represent a bound on the recurrence.
49 RecurrencePhiBoundCmp,
50 };
51
52 enum CmpPred {
53 CmpEq,
54 CmpNe,
55 };
56
57 struct CmpDesc {
58 CmpDesc(CmpKind k, CmpPred p, Use *u, Value *v)
59 : kind(k), pred(p), use(u), ptrValue(v) { }
60
61 CmpKind kind;
62 CmpPred pred;
63 Use *use;
64 Value *ptrValue;
65 };
66
67 typedef SmallVector<CmpDesc, 4> CmpDescVec;
68
69 bool isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis, PHINode*);
70 Value *isNontrivialInBoundsRecurrence(PHINode*);
71
72 bool classifyCmp(CmpDescVec*, Use*);
73 bool findRelevantCmps(CmpDescVec*, Use*);
74
75 bool blockContainsLoadDerivedFrom(BasicBlock*, Value*);
76
77 /// Tracks values that are unconditionally nonnull-or-poison by definition,
78 /// but not values that are known nonnull-or-poison in a given context by
79 /// their uses, e.g. in recurrences.
80 DenseSet<Value*> NonNullOrPoisonValues;
81
82 /// Tracks values that are bases of nontrivial inbounds recurrences.
83 DenseSet<Value*> InBoundsRecurrenceBases;
84
85 /// Maps phis that correspond to nontrivial inbounds recurrences to their
86 /// base values.
87 DenseMap<Value*, Value*> InBoundsRecurrenceBaseMap;
88 };
89 }
90
91 char NullCheckElimination::ID = 0;
92 INITIALIZE_PASS_BEGIN(NullCheckElimination,
93 "null-check-elimination",
94 "Null Check Elimination",
95 false, false)
96 INITIALIZE_PASS_END(NullCheckElimination,
97 "null-check-elimination",
98 "Null Check Elimination",
99 false, false)
100
101 FunctionPass *llvm::createNullCheckEliminationPass() {
102 return new NullCheckElimination();
103 }
104
105 static GetElementPtrInst *castToInBoundsGEP(Value *V) {
106 auto *GEP = dyn_cast<GetElementPtrInst>(V);
107 if (!GEP || !GEP->isInBounds())
108 return nullptr;
109 return GEP;
110 }
111
112 static bool isZeroConstant(Value *V) {
113 auto *C = dyn_cast<Constant>(V);
114 return C && C->isZeroValue();
115 }
116
117 bool NullCheckElimination::runOnFunction(Function &F) {
118 if (skipOptnoneFunction(F))
119 return false;
120
121 bool Changed = false;
122
123 // Collect arguments with the `nonnull` attribute.
124 for (auto &Arg : F.args()) {
125 if (Arg.hasNonNullAttr())
126 NonNullOrPoisonValues.insert(&Arg);
127 }
128
129 // Collect instructions that definitely produce nonnull-or-poison values. At
130 // the moment, this is restricted to inbounds GEPs, and phis that are derived
131 // entirely from nonnull-or-poison-values (including other phis that are
132 // themselves derived from the same).
133 for (auto &BB : F) {
134 for (auto &I : BB) {
135 if (auto *GEP = castToInBoundsGEP(&I)) {
136 NonNullOrPoisonValues.insert(GEP);
137 } else if (auto *PN = dyn_cast<PHINode>(&I)) {
138 SmallPhiSet VisitedPHIs;
139 if (isNonNullOrPoisonPhi(&VisitedPHIs, PN))
140 NonNullOrPoisonValues.insert(PN);
141
142 if (auto *BaseV = isNontrivialInBoundsRecurrence(PN)) {
143 InBoundsRecurrenceBases.insert(BaseV);
144 InBoundsRecurrenceBaseMap[PN] = BaseV;
145 }
146 }
147 }
148 }
149
150 for (auto &BB : F) {
151 // This could also be extended to handle SwitchInst, but using a SwitchInst
152 // for a null check seems unlikely.
153 auto *BI = dyn_cast<BranchInst>(BB.getTerminator());
154 if (!BI || BI->isUnconditional())
155 continue;
156
157 // The first operand of a conditional branch is the condition.
158 CmpDescVec Cmps;
159 if (!findRelevantCmps(&Cmps, &BI->getOperandUse(0)))
160 continue;
161
162 for (auto &Cmp : Cmps) {
163 // We are only tracking comparisons of inbounds recurrence phis with their
164 // bounds so that we can eliminate null checks based on them, which are of
165 // kind NullCheckRecurrenceCmp. We can't use a lone RecurrencePhiBoundCmp
166 // to perform any optimizations.
167 if (Cmp.kind == RecurrencePhiBoundCmp)
168 continue;
169
170 if (Cmp.kind == NullCheckRecurrenceCmp) {
171 // Look for a matching RecurrencePhiBoundCmp. If one exists, then we can
172 // be sure that this branch condition depends on the recurrence. Since
173 // both the bounds and the recurrence successor value are inbounds, and
174 // they are both derived from the base, the base being null would imply
175 // that the bounds and recurrence successor values are poison.
176 bool FoundMatchingCmp = false;
177 for (auto &OtherCmp : Cmps) {
178 if (OtherCmp.kind == RecurrencePhiBoundCmp &&
179 OtherCmp.ptrValue == Cmp.ptrValue) {
180 FoundMatchingCmp = true;
181 break;
182 }
183 }
184 if (!FoundMatchingCmp)
185 continue;
186 }
187
188 BasicBlock *NonNullBB;
189 if (Cmp.pred == CmpEq) {
190 // If the comparison instruction is checking for equality with null then
191 // the pointer is nonnull on the `false` branch.
192 NonNullBB = BI->getSuccessor(1);
193 } else {
194 // Otherwise, if the comparison instruction is checking for inequality
195 // with null, the pointer is nonnull on the `true` branch.
196 NonNullBB = BI->getSuccessor(0);
197 }
198
199 // This is a crude approximation of control dependence: if the branch
200 // target has a single predecessor edge, then it must be control-
201 // dependent on the branch.
202 if (!NonNullBB->getSinglePredecessor())
203 continue;
204
205 // Due to the semantics of poison values in LLVM, we have to check that
206 // there is actually some externally visible side effect that is dependent
207 // on the poison value. Since poison values are otherwise treated as
208 // undef, and a load of undef is undefined behavior (which is externally
209 // visible), it suffices to look for a load of the nonnull-or-poison
210 // value.
211 //
212 // This could be extended to any block control-dependent on this branch of
213 // the null check, it's unclear if that will actually catch more cases in
214 // real code.
215 if (blockContainsLoadDerivedFrom(NonNullBB, Cmp.ptrValue)) {
216 Type *BoolTy = Type::getInt1Ty(F.getContext());
217 Value *NewV = ConstantInt::get(BoolTy, Cmp.pred == CmpNe);
218 Cmp.use->set(NewV);
219 Changed = true;
220 }
221 }
222 }
223
224 NonNullOrPoisonValues.clear();
225 InBoundsRecurrenceBases.clear();
226 InBoundsRecurrenceBaseMap.clear();
227
228 return Changed;
229 }
230
231 /// Checks whether a phi is derived from known nonnnull-or-poison values,
232 /// including other phis that are derived from the same. May return `false`
233 /// conservatively in some cases, e.g. if exploring a large cycle of phis.
234 ///
235 /// This function may also insert any inbounds GEPs that it finds into
236 /// NonNullOrPoisonValues.
237 bool
238 NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet *VisitedPhis,
239 PHINode *PN) {
240 // If we've already seen this phi, return `true`, even though it may not be
241 // nonnull, since some other operand in a cycle of phis may invalidate the
242 // optimistic assumption that the entire cycle is nonnull, including this phi.
243 if (!VisitedPhis->insert(PN).second)
244 return true;
245
246 // Use a sensible limit to avoid iterating over long chains of phis that are
247 // unlikely to be nonnull.
248 if (VisitedPhis->size() >= kPhiLimit)
249 return false;
250
251 unsigned numOperands = PN->getNumOperands();
252 for (unsigned i = 0; i < numOperands; ++i) {
253 Value *SrcValue = PN->getOperand(i);
254 if (NonNullOrPoisonValues.count(SrcValue)) {
255 continue;
256 } else if (auto *GEP = castToInBoundsGEP(SrcValue)) {
257 NonNullOrPoisonValues.insert(GEP);
258 } else if (auto *SrcPN = dyn_cast<PHINode>(SrcValue)) {
259 if (!isNonNullOrPoisonPhi(VisitedPhis, SrcPN))
260 return false;
261 } else {
262 return false;
263 }
264 }
265
266 return true;
267 }
268
269 /// Determines whether a phi corresponds to an inbounds recurrence where the
270 /// base is not a known nonnull-or-poison value. Returns the base value, or
271 /// null if the phi doesn't correspond to such a recurrence.
272 Value *NullCheckElimination::isNontrivialInBoundsRecurrence(PHINode *PN) {
273 if (PN->getNumOperands() != 2)
274 return nullptr;
275
276 Value *BaseV;
277 GetElementPtrInst *SuccessorI;
278 if (auto *GEP = castToInBoundsGEP(PN->getOperand(0))) {
279 BaseV = PN->getOperand(1);
280 SuccessorI = GEP;
281 } else if (auto *GEP = castToInBoundsGEP(PN->getOperand(1))) {
282 BaseV = PN->getOperand(0);
283 SuccessorI = GEP;
284 } else {
285 return nullptr;
286 }
287
288 if (NonNullOrPoisonValues.count(BaseV) || SuccessorI->getOperand(0) != PN)
289 return nullptr;
290
291 return BaseV;
292 }
293
294 /// Determines whether an ICmpInst is one of the forms that is relevant to
295 /// null check elimination, and then adds a CmpDesc to Cmps when applicable.
296 /// The ICmpInst is passed as a Use so this Use can be placed into the CmpDesc,
297 /// but the Use parameter must be a Use of an ICmpInst.
298 bool NullCheckElimination::classifyCmp(CmpDescVec *Cmps, Use *U) {
299 auto *CI = cast<ICmpInst>(U);
300 if (!CI->isEquality())
301 return false;
302
303 CmpPred Pred = (CI->getPredicate() == llvm::CmpInst::ICMP_EQ) ? CmpEq : CmpNe;
304 Value *Op0 = CI->getOperand(0);
305 Value *Op1 = CI->getOperand(1);
306
307 if (NonNullOrPoisonValues.count(Op0)) {
308 if (isZeroConstant(Op1)) {
309 Cmps->push_back(CmpDesc(NullCheckDefiniteCmp, Pred, U, Op0));
310 return true;
311 }
312
313 auto it = InBoundsRecurrenceBaseMap.find(Op1);
314 if (it == InBoundsRecurrenceBaseMap.end())
315 return false;
316
317 auto *GEP = castToInBoundsGEP(Op0);
318 if (!GEP)
319 return false;
320
321 auto *BaseV = it->second;
322 if (GEP->getOperand(0) != BaseV)
323 return false;
324
325 Cmps->push_back(CmpDesc(RecurrencePhiBoundCmp, Pred, U, Op1));
326 return true;
327 }
328
329 // Since InstCombine or InstSimplify should have canonicalized a comparison
330 // with `null` to have the `null` in the second operand, we don't need to
331 // handle the case where Op0 is `null` like we did with Op1 above.
332 if (NonNullOrPoisonValues.count(Op1)) {
333 auto it = InBoundsRecurrenceBaseMap.find(Op0);
334 if (it == InBoundsRecurrenceBaseMap.end())
335 return false;
336
337 auto *GEP = castToInBoundsGEP(Op1);
338 if (!GEP)
339 return false;
340
341 auto *BaseV = it->second;
342 if (GEP->getOperand(0) != BaseV)
343 return false;
344
345 Cmps->push_back(CmpDesc(RecurrencePhiBoundCmp, Pred, U, Op0));
346 return true;
347 }
348
349 if (InBoundsRecurrenceBaseMap.count(Op0)) {
350 if (isZeroConstant(Op1)) {
351 Cmps->push_back(CmpDesc(NullCheckRecurrenceCmp, Pred, U, Op0));
352 return true;
353 }
354 }
355
356 return false;
357 }
358
359 /// Classifies the comparisons that are relevant to null check elimination,
360 /// starting from a Use. The CmpDescs of the comparisons are collected in Cmps.
361 bool NullCheckElimination::findRelevantCmps(CmpDescVec *Cmps, Use *U) {
362 auto *I = dyn_cast<Instruction>(U->get());
363 if (!I)
364 return false;
365
366 if (isa<ICmpInst>(I))
367 return classifyCmp(Cmps, U);
368
369 unsigned Opcode = I->getOpcode();
370 if (Opcode == Instruction::Or || Opcode == Instruction::And) {
371 bool FoundCmps = findRelevantCmps(Cmps, &I->getOperandUse(0));
372 FoundCmps |= findRelevantCmps(Cmps, &I->getOperandUse(1));
373 return FoundCmps;
374 }
375
376 return false;
377 }
378
379 /// Determines whether `BB` contains a load from `PtrV`, or any inbounds GEP
380 /// derived from `PtrV`.
381 bool
382 NullCheckElimination::blockContainsLoadDerivedFrom(BasicBlock *BB,
383 Value *PtrV) {
384 for (auto &I : *BB) {
385 auto *LI = dyn_cast<LoadInst>(&I);
386 if (!LI)
387 continue;
388
389 Value *V = LI->getPointerOperand();
390 while (1) {
391 if (V == PtrV)
392 return true;
393
394 auto *GEP = castToInBoundsGEP(V);
395 if (!GEP)
396 break;
397
398 V = GEP->getOperand(0);
399 }
400 }
401
402 return false;
403 }
404