1 //===-- NullCheckElimination.cpp - Null Check Elimination Pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Transforms/Scalar.h"
11 #include "llvm/ADT/DenseSet.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/Function.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Pass.h"
20 #define DEBUG_TYPE "null-check-elimination"
23 struct NullCheckElimination
: public FunctionPass
{
25 NullCheckElimination() : FunctionPass(ID
) {
26 initializeNullCheckEliminationPass(*PassRegistry::getPassRegistry());
28 bool runOnFunction(Function
&F
) override
;
30 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
35 static const unsigned kPhiLimit
= 16;
36 typedef SmallPtrSet
<PHINode
*, kPhiLimit
> SmallPhiSet
;
39 /// A null check of an unconditionally nonnull-or-poison value.
42 /// A null check of the phi representing a nontrivial inbounds recurrence,
43 /// which is not known to be unconditionally nonnull-or-poison.
44 NullCheckRecurrenceCmp
,
46 /// A comparison of the phi representing a nontrivial inbounds recurrence
47 /// with an inbounds GEP derived from the base of the recurrence, which
48 /// will typically represent a bound on the recurrence.
49 RecurrencePhiBoundCmp
,
58 CmpDesc(CmpKind k
, CmpPred p
, Use
*u
, Value
*v
)
59 : kind(k
), pred(p
), use(u
), ptrValue(v
) { }
67 typedef SmallVector
<CmpDesc
, 4> CmpDescVec
;
69 bool isNonNullOrPoisonPhi(SmallPhiSet
*VisitedPhis
, PHINode
*);
70 Value
*isNontrivialInBoundsRecurrence(PHINode
*);
72 bool classifyCmp(CmpDescVec
*, Use
*);
73 bool findRelevantCmps(CmpDescVec
*, Use
*);
75 bool blockContainsLoadDerivedFrom(BasicBlock
*, Value
*);
77 /// Tracks values that are unconditionally nonnull-or-poison by definition,
78 /// but not values that are known nonnull-or-poison in a given context by
79 /// their uses, e.g. in recurrences.
80 DenseSet
<Value
*> NonNullOrPoisonValues
;
82 /// Tracks values that are bases of nontrivial inbounds recurrences.
83 DenseSet
<Value
*> InBoundsRecurrenceBases
;
85 /// Maps phis that correspond to nontrivial inbounds recurrences to their
87 DenseMap
<Value
*, Value
*> InBoundsRecurrenceBaseMap
;
91 char NullCheckElimination::ID
= 0;
92 INITIALIZE_PASS_BEGIN(NullCheckElimination
,
93 "null-check-elimination",
94 "Null Check Elimination",
96 INITIALIZE_PASS_END(NullCheckElimination
,
97 "null-check-elimination",
98 "Null Check Elimination",
101 FunctionPass
*llvm::createNullCheckEliminationPass() {
102 return new NullCheckElimination();
105 static GetElementPtrInst
*castToInBoundsGEP(Value
*V
) {
106 auto *GEP
= dyn_cast
<GetElementPtrInst
>(V
);
107 if (!GEP
|| !GEP
->isInBounds())
112 static bool isZeroConstant(Value
*V
) {
113 auto *C
= dyn_cast
<Constant
>(V
);
114 return C
&& C
->isZeroValue();
117 bool NullCheckElimination::runOnFunction(Function
&F
) {
118 if (skipOptnoneFunction(F
))
121 bool Changed
= false;
123 // Collect arguments with the `nonnull` attribute.
124 for (auto &Arg
: F
.args()) {
125 if (Arg
.hasNonNullAttr())
126 NonNullOrPoisonValues
.insert(&Arg
);
129 // Collect instructions that definitely produce nonnull-or-poison values. At
130 // the moment, this is restricted to inbounds GEPs, and phis that are derived
131 // entirely from nonnull-or-poison-values (including other phis that are
132 // themselves derived from the same).
135 if (auto *GEP
= castToInBoundsGEP(&I
)) {
136 NonNullOrPoisonValues
.insert(GEP
);
137 } else if (auto *PN
= dyn_cast
<PHINode
>(&I
)) {
138 SmallPhiSet VisitedPHIs
;
139 if (isNonNullOrPoisonPhi(&VisitedPHIs
, PN
))
140 NonNullOrPoisonValues
.insert(PN
);
142 if (auto *BaseV
= isNontrivialInBoundsRecurrence(PN
)) {
143 InBoundsRecurrenceBases
.insert(BaseV
);
144 InBoundsRecurrenceBaseMap
[PN
] = BaseV
;
151 // This could also be extended to handle SwitchInst, but using a SwitchInst
152 // for a null check seems unlikely.
153 auto *BI
= dyn_cast
<BranchInst
>(BB
.getTerminator());
154 if (!BI
|| BI
->isUnconditional())
157 // The first operand of a conditional branch is the condition.
159 if (!findRelevantCmps(&Cmps
, &BI
->getOperandUse(0)))
162 for (auto &Cmp
: Cmps
) {
163 // We are only tracking comparisons of inbounds recurrence phis with their
164 // bounds so that we can eliminate null checks based on them, which are of
165 // kind NullCheckRecurrenceCmp. We can't use a lone RecurrencePhiBoundCmp
166 // to perform any optimizations.
167 if (Cmp
.kind
== RecurrencePhiBoundCmp
)
170 if (Cmp
.kind
== NullCheckRecurrenceCmp
) {
171 // Look for a matching RecurrencePhiBoundCmp. If one exists, then we can
172 // be sure that this branch condition depends on the recurrence. Since
173 // both the bounds and the recurrence successor value are inbounds, and
174 // they are both derived from the base, the base being null would imply
175 // that the bounds and recurrence successor values are poison.
176 bool FoundMatchingCmp
= false;
177 for (auto &OtherCmp
: Cmps
) {
178 if (OtherCmp
.kind
== RecurrencePhiBoundCmp
&&
179 OtherCmp
.ptrValue
== Cmp
.ptrValue
) {
180 FoundMatchingCmp
= true;
184 if (!FoundMatchingCmp
)
188 BasicBlock
*NonNullBB
;
189 if (Cmp
.pred
== CmpEq
) {
190 // If the comparison instruction is checking for equality with null then
191 // the pointer is nonnull on the `false` branch.
192 NonNullBB
= BI
->getSuccessor(1);
194 // Otherwise, if the comparison instruction is checking for inequality
195 // with null, the pointer is nonnull on the `true` branch.
196 NonNullBB
= BI
->getSuccessor(0);
199 // This is a crude approximation of control dependence: if the branch
200 // target has a single predecessor edge, then it must be control-
201 // dependent on the branch.
202 if (!NonNullBB
->getSinglePredecessor())
205 // Due to the semantics of poison values in LLVM, we have to check that
206 // there is actually some externally visible side effect that is dependent
207 // on the poison value. Since poison values are otherwise treated as
208 // undef, and a load of undef is undefined behavior (which is externally
209 // visible), it suffices to look for a load of the nonnull-or-poison
212 // This could be extended to any block control-dependent on this branch of
213 // the null check, it's unclear if that will actually catch more cases in
215 if (blockContainsLoadDerivedFrom(NonNullBB
, Cmp
.ptrValue
)) {
216 Type
*BoolTy
= Type::getInt1Ty(F
.getContext());
217 Value
*NewV
= ConstantInt::get(BoolTy
, Cmp
.pred
== CmpNe
);
224 NonNullOrPoisonValues
.clear();
225 InBoundsRecurrenceBases
.clear();
226 InBoundsRecurrenceBaseMap
.clear();
231 /// Checks whether a phi is derived from known nonnnull-or-poison values,
232 /// including other phis that are derived from the same. May return `false`
233 /// conservatively in some cases, e.g. if exploring a large cycle of phis.
235 /// This function may also insert any inbounds GEPs that it finds into
236 /// NonNullOrPoisonValues.
238 NullCheckElimination::isNonNullOrPoisonPhi(SmallPhiSet
*VisitedPhis
,
240 // If we've already seen this phi, return `true`, even though it may not be
241 // nonnull, since some other operand in a cycle of phis may invalidate the
242 // optimistic assumption that the entire cycle is nonnull, including this phi.
243 if (!VisitedPhis
->insert(PN
).second
)
246 // Use a sensible limit to avoid iterating over long chains of phis that are
247 // unlikely to be nonnull.
248 if (VisitedPhis
->size() >= kPhiLimit
)
251 unsigned numOperands
= PN
->getNumOperands();
252 for (unsigned i
= 0; i
< numOperands
; ++i
) {
253 Value
*SrcValue
= PN
->getOperand(i
);
254 if (NonNullOrPoisonValues
.count(SrcValue
)) {
256 } else if (auto *GEP
= castToInBoundsGEP(SrcValue
)) {
257 NonNullOrPoisonValues
.insert(GEP
);
258 } else if (auto *SrcPN
= dyn_cast
<PHINode
>(SrcValue
)) {
259 if (!isNonNullOrPoisonPhi(VisitedPhis
, SrcPN
))
269 /// Determines whether a phi corresponds to an inbounds recurrence where the
270 /// base is not a known nonnull-or-poison value. Returns the base value, or
271 /// null if the phi doesn't correspond to such a recurrence.
272 Value
*NullCheckElimination::isNontrivialInBoundsRecurrence(PHINode
*PN
) {
273 if (PN
->getNumOperands() != 2)
277 GetElementPtrInst
*SuccessorI
;
278 if (auto *GEP
= castToInBoundsGEP(PN
->getOperand(0))) {
279 BaseV
= PN
->getOperand(1);
281 } else if (auto *GEP
= castToInBoundsGEP(PN
->getOperand(1))) {
282 BaseV
= PN
->getOperand(0);
288 if (NonNullOrPoisonValues
.count(BaseV
) || SuccessorI
->getOperand(0) != PN
)
294 /// Determines whether an ICmpInst is one of the forms that is relevant to
295 /// null check elimination, and then adds a CmpDesc to Cmps when applicable.
296 /// The ICmpInst is passed as a Use so this Use can be placed into the CmpDesc,
297 /// but the Use parameter must be a Use of an ICmpInst.
298 bool NullCheckElimination::classifyCmp(CmpDescVec
*Cmps
, Use
*U
) {
299 auto *CI
= cast
<ICmpInst
>(U
);
300 if (!CI
->isEquality())
303 CmpPred Pred
= (CI
->getPredicate() == llvm::CmpInst::ICMP_EQ
) ? CmpEq
: CmpNe
;
304 Value
*Op0
= CI
->getOperand(0);
305 Value
*Op1
= CI
->getOperand(1);
307 if (NonNullOrPoisonValues
.count(Op0
)) {
308 if (isZeroConstant(Op1
)) {
309 Cmps
->push_back(CmpDesc(NullCheckDefiniteCmp
, Pred
, U
, Op0
));
313 auto it
= InBoundsRecurrenceBaseMap
.find(Op1
);
314 if (it
== InBoundsRecurrenceBaseMap
.end())
317 auto *GEP
= castToInBoundsGEP(Op0
);
321 auto *BaseV
= it
->second
;
322 if (GEP
->getOperand(0) != BaseV
)
325 Cmps
->push_back(CmpDesc(RecurrencePhiBoundCmp
, Pred
, U
, Op1
));
329 // Since InstCombine or InstSimplify should have canonicalized a comparison
330 // with `null` to have the `null` in the second operand, we don't need to
331 // handle the case where Op0 is `null` like we did with Op1 above.
332 if (NonNullOrPoisonValues
.count(Op1
)) {
333 auto it
= InBoundsRecurrenceBaseMap
.find(Op0
);
334 if (it
== InBoundsRecurrenceBaseMap
.end())
337 auto *GEP
= castToInBoundsGEP(Op1
);
341 auto *BaseV
= it
->second
;
342 if (GEP
->getOperand(0) != BaseV
)
345 Cmps
->push_back(CmpDesc(RecurrencePhiBoundCmp
, Pred
, U
, Op0
));
349 if (InBoundsRecurrenceBaseMap
.count(Op0
)) {
350 if (isZeroConstant(Op1
)) {
351 Cmps
->push_back(CmpDesc(NullCheckRecurrenceCmp
, Pred
, U
, Op0
));
359 /// Classifies the comparisons that are relevant to null check elimination,
360 /// starting from a Use. The CmpDescs of the comparisons are collected in Cmps.
361 bool NullCheckElimination::findRelevantCmps(CmpDescVec
*Cmps
, Use
*U
) {
362 auto *I
= dyn_cast
<Instruction
>(U
->get());
366 if (isa
<ICmpInst
>(I
))
367 return classifyCmp(Cmps
, U
);
369 unsigned Opcode
= I
->getOpcode();
370 if (Opcode
== Instruction::Or
|| Opcode
== Instruction::And
) {
371 bool FoundCmps
= findRelevantCmps(Cmps
, &I
->getOperandUse(0));
372 FoundCmps
|= findRelevantCmps(Cmps
, &I
->getOperandUse(1));
379 /// Determines whether `BB` contains a load from `PtrV`, or any inbounds GEP
380 /// derived from `PtrV`.
382 NullCheckElimination::blockContainsLoadDerivedFrom(BasicBlock
*BB
,
384 for (auto &I
: *BB
) {
385 auto *LI
= dyn_cast
<LoadInst
>(&I
);
389 Value
*V
= LI
->getPointerOperand();
394 auto *GEP
= castToInBoundsGEP(V
);
398 V
= GEP
->getOperand(0);