1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements inline cost analysis.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/InlineCost.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/CodeMetrics.h"
22 #include "llvm/Analysis/ConstantFolding.h"
23 #include "llvm/Analysis/InstructionSimplify.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/GetElementPtrTypeIterator.h"
29 #include "llvm/IR/GlobalAlias.h"
30 #include "llvm/IR/InstVisitor.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/Operator.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
38 #define DEBUG_TYPE "inline-cost"
40 STATISTIC(NumCallsAnalyzed
, "Number of call sites analyzed");
44 class CallAnalyzer
: public InstVisitor
<CallAnalyzer
, bool> {
45 typedef InstVisitor
<CallAnalyzer
, bool> Base
;
46 friend class InstVisitor
<CallAnalyzer
, bool>;
48 // DataLayout if available, or null.
49 const DataLayout
*const DL
;
51 /// The TargetTransformInfo available for this compilation.
52 const TargetTransformInfo
&TTI
;
54 /// The cache of @llvm.assume intrinsics.
55 AssumptionCacheTracker
*ACT
;
57 // The called function.
63 bool IsCallerRecursive
;
65 bool ExposesReturnsTwice
;
66 bool HasDynamicAlloca
;
67 bool ContainsNoDuplicateCall
;
71 /// Number of bytes allocated statically by the callee.
72 uint64_t AllocatedSize
;
73 unsigned NumInstructions
, NumVectorInstructions
;
74 int FiftyPercentVectorBonus
, TenPercentVectorBonus
;
77 // While we walk the potentially-inlined instructions, we build up and
78 // maintain a mapping of simplified values specific to this callsite. The
79 // idea is to propagate any special information we have about arguments to
80 // this call through the inlinable section of the function, and account for
81 // likely simplifications post-inlining. The most important aspect we track
82 // is CFG altering simplifications -- when we prove a basic block dead, that
83 // can cause dramatic shifts in the cost of inlining a function.
84 DenseMap
<Value
*, Constant
*> SimplifiedValues
;
86 // Keep track of the values which map back (through function arguments) to
87 // allocas on the caller stack which could be simplified through SROA.
88 DenseMap
<Value
*, Value
*> SROAArgValues
;
90 // The mapping of caller Alloca values to their accumulated cost savings. If
91 // we have to disable SROA for one of the allocas, this tells us how much
92 // cost must be added.
93 DenseMap
<Value
*, int> SROAArgCosts
;
95 // Keep track of values which map to a pointer base and constant offset.
96 DenseMap
<Value
*, std::pair
<Value
*, APInt
> > ConstantOffsetPtrs
;
98 // Custom simplification helper routines.
99 bool isAllocaDerivedArg(Value
*V
);
100 bool lookupSROAArgAndCost(Value
*V
, Value
*&Arg
,
101 DenseMap
<Value
*, int>::iterator
&CostIt
);
102 void disableSROA(DenseMap
<Value
*, int>::iterator CostIt
);
103 void disableSROA(Value
*V
);
104 void accumulateSROACost(DenseMap
<Value
*, int>::iterator CostIt
,
105 int InstructionCost
);
106 bool isGEPOffsetConstant(GetElementPtrInst
&GEP
);
107 bool accumulateGEPOffset(GEPOperator
&GEP
, APInt
&Offset
);
108 bool simplifyCallSite(Function
*F
, CallSite CS
);
109 ConstantInt
*stripAndComputeInBoundsConstantOffsets(Value
*&V
);
111 // Custom analysis routines.
112 bool analyzeBlock(BasicBlock
*BB
, SmallPtrSetImpl
<const Value
*> &EphValues
);
114 // Disable several entry points to the visitor so we don't accidentally use
115 // them by declaring but not defining them here.
116 void visit(Module
*); void visit(Module
&);
117 void visit(Function
*); void visit(Function
&);
118 void visit(BasicBlock
*); void visit(BasicBlock
&);
120 // Provide base case for our instruction visit.
121 bool visitInstruction(Instruction
&I
);
123 // Our visit overrides.
124 bool visitAlloca(AllocaInst
&I
);
125 bool visitPHI(PHINode
&I
);
126 bool visitGetElementPtr(GetElementPtrInst
&I
);
127 bool visitBitCast(BitCastInst
&I
);
128 bool visitPtrToInt(PtrToIntInst
&I
);
129 bool visitIntToPtr(IntToPtrInst
&I
);
130 bool visitCastInst(CastInst
&I
);
131 bool visitUnaryInstruction(UnaryInstruction
&I
);
132 bool visitCmpInst(CmpInst
&I
);
133 bool visitSub(BinaryOperator
&I
);
134 bool visitBinaryOperator(BinaryOperator
&I
);
135 bool visitLoad(LoadInst
&I
);
136 bool visitStore(StoreInst
&I
);
137 bool visitExtractValue(ExtractValueInst
&I
);
138 bool visitInsertValue(InsertValueInst
&I
);
139 bool visitCallSite(CallSite CS
);
140 bool visitReturnInst(ReturnInst
&RI
);
141 bool visitBranchInst(BranchInst
&BI
);
142 bool visitSwitchInst(SwitchInst
&SI
);
143 bool visitIndirectBrInst(IndirectBrInst
&IBI
);
144 bool visitResumeInst(ResumeInst
&RI
);
145 bool visitUnreachableInst(UnreachableInst
&I
);
148 CallAnalyzer(const DataLayout
*DL
, const TargetTransformInfo
&TTI
,
149 AssumptionCacheTracker
*ACT
, Function
&Callee
, int Threshold
)
150 : DL(DL
), TTI(TTI
), ACT(ACT
), F(Callee
), Threshold(Threshold
), Cost(0),
151 IsCallerRecursive(false), IsRecursiveCall(false),
152 ExposesReturnsTwice(false), HasDynamicAlloca(false),
153 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
154 AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
155 FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
156 NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
157 NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
158 NumInstructionsSimplified(0), SROACostSavings(0),
159 SROACostSavingsLost(0) {}
161 bool analyzeCall(CallSite CS
);
163 int getThreshold() { return Threshold
; }
164 int getCost() { return Cost
; }
166 // Keep a bunch of stats about the cost savings found so we can print them
167 // out when debugging.
168 unsigned NumConstantArgs
;
169 unsigned NumConstantOffsetPtrArgs
;
170 unsigned NumAllocaArgs
;
171 unsigned NumConstantPtrCmps
;
172 unsigned NumConstantPtrDiffs
;
173 unsigned NumInstructionsSimplified
;
174 unsigned SROACostSavings
;
175 unsigned SROACostSavingsLost
;
182 /// \brief Test whether the given value is an Alloca-derived function argument.
183 bool CallAnalyzer::isAllocaDerivedArg(Value
*V
) {
184 return SROAArgValues
.count(V
);
187 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
188 /// Returns false if V does not map to a SROA-candidate.
189 bool CallAnalyzer::lookupSROAArgAndCost(
190 Value
*V
, Value
*&Arg
, DenseMap
<Value
*, int>::iterator
&CostIt
) {
191 if (SROAArgValues
.empty() || SROAArgCosts
.empty())
194 DenseMap
<Value
*, Value
*>::iterator ArgIt
= SROAArgValues
.find(V
);
195 if (ArgIt
== SROAArgValues
.end())
199 CostIt
= SROAArgCosts
.find(Arg
);
200 return CostIt
!= SROAArgCosts
.end();
203 /// \brief Disable SROA for the candidate marked by this cost iterator.
205 /// This marks the candidate as no longer viable for SROA, and adds the cost
206 /// savings associated with it back into the inline cost measurement.
207 void CallAnalyzer::disableSROA(DenseMap
<Value
*, int>::iterator CostIt
) {
208 // If we're no longer able to perform SROA we need to undo its cost savings
209 // and prevent subsequent analysis.
210 Cost
+= CostIt
->second
;
211 SROACostSavings
-= CostIt
->second
;
212 SROACostSavingsLost
+= CostIt
->second
;
213 SROAArgCosts
.erase(CostIt
);
216 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
217 void CallAnalyzer::disableSROA(Value
*V
) {
219 DenseMap
<Value
*, int>::iterator CostIt
;
220 if (lookupSROAArgAndCost(V
, SROAArg
, CostIt
))
224 /// \brief Accumulate the given cost for a particular SROA candidate.
225 void CallAnalyzer::accumulateSROACost(DenseMap
<Value
*, int>::iterator CostIt
,
226 int InstructionCost
) {
227 CostIt
->second
+= InstructionCost
;
228 SROACostSavings
+= InstructionCost
;
231 /// \brief Check whether a GEP's indices are all constant.
233 /// Respects any simplified values known during the analysis of this callsite.
234 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst
&GEP
) {
235 for (User::op_iterator I
= GEP
.idx_begin(), E
= GEP
.idx_end(); I
!= E
; ++I
)
236 if (!isa
<Constant
>(*I
) && !SimplifiedValues
.lookup(*I
))
242 /// \brief Accumulate a constant GEP offset into an APInt if possible.
244 /// Returns false if unable to compute the offset for any reason. Respects any
245 /// simplified values known during the analysis of this callsite.
246 bool CallAnalyzer::accumulateGEPOffset(GEPOperator
&GEP
, APInt
&Offset
) {
250 unsigned IntPtrWidth
= DL
->getPointerSizeInBits();
251 assert(IntPtrWidth
== Offset
.getBitWidth());
253 for (gep_type_iterator GTI
= gep_type_begin(GEP
), GTE
= gep_type_end(GEP
);
255 ConstantInt
*OpC
= dyn_cast
<ConstantInt
>(GTI
.getOperand());
257 if (Constant
*SimpleOp
= SimplifiedValues
.lookup(GTI
.getOperand()))
258 OpC
= dyn_cast
<ConstantInt
>(SimpleOp
);
261 if (OpC
->isZero()) continue;
263 // Handle a struct index, which adds its field offset to the pointer.
264 if (StructType
*STy
= dyn_cast
<StructType
>(*GTI
)) {
265 unsigned ElementIdx
= OpC
->getZExtValue();
266 const StructLayout
*SL
= DL
->getStructLayout(STy
);
267 Offset
+= APInt(IntPtrWidth
, SL
->getElementOffset(ElementIdx
));
271 APInt
TypeSize(IntPtrWidth
, DL
->getTypeAllocSize(GTI
.getIndexedType()));
272 Offset
+= OpC
->getValue().sextOrTrunc(IntPtrWidth
) * TypeSize
;
277 bool CallAnalyzer::visitAlloca(AllocaInst
&I
) {
278 // Check whether inlining will turn a dynamic alloca into a static
279 // alloca, and handle that case.
280 if (I
.isArrayAllocation()) {
281 if (Constant
*Size
= SimplifiedValues
.lookup(I
.getArraySize())) {
282 ConstantInt
*AllocSize
= dyn_cast
<ConstantInt
>(Size
);
283 assert(AllocSize
&& "Allocation size not a constant int?");
284 Type
*Ty
= I
.getAllocatedType();
285 AllocatedSize
+= Ty
->getPrimitiveSizeInBits() * AllocSize
->getZExtValue();
286 return Base::visitAlloca(I
);
290 // Accumulate the allocated size.
291 if (I
.isStaticAlloca()) {
292 Type
*Ty
= I
.getAllocatedType();
293 AllocatedSize
+= (DL
? DL
->getTypeAllocSize(Ty
) :
294 Ty
->getPrimitiveSizeInBits());
297 // We will happily inline static alloca instructions.
298 if (I
.isStaticAlloca())
299 return Base::visitAlloca(I
);
301 // FIXME: This is overly conservative. Dynamic allocas are inefficient for
302 // a variety of reasons, and so we would like to not inline them into
303 // functions which don't currently have a dynamic alloca. This simply
304 // disables inlining altogether in the presence of a dynamic alloca.
305 HasDynamicAlloca
= true;
309 bool CallAnalyzer::visitPHI(PHINode
&I
) {
310 // FIXME: We should potentially be tracking values through phi nodes,
311 // especially when they collapse to a single value due to deleted CFG edges
314 // FIXME: We need to propagate SROA *disabling* through phi nodes, even
315 // though we don't want to propagate it's bonuses. The idea is to disable
316 // SROA if it *might* be used in an inappropriate manner.
318 // Phi nodes are always zero-cost.
322 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst
&I
) {
324 DenseMap
<Value
*, int>::iterator CostIt
;
325 bool SROACandidate
= lookupSROAArgAndCost(I
.getPointerOperand(),
328 // Try to fold GEPs of constant-offset call site argument pointers. This
329 // requires target data and inbounds GEPs.
330 if (DL
&& I
.isInBounds()) {
331 // Check if we have a base + offset for the pointer.
332 Value
*Ptr
= I
.getPointerOperand();
333 std::pair
<Value
*, APInt
> BaseAndOffset
= ConstantOffsetPtrs
.lookup(Ptr
);
334 if (BaseAndOffset
.first
) {
335 // Check if the offset of this GEP is constant, and if so accumulate it
337 if (!accumulateGEPOffset(cast
<GEPOperator
>(I
), BaseAndOffset
.second
)) {
338 // Non-constant GEPs aren't folded, and disable SROA.
344 // Add the result as a new mapping to Base + Offset.
345 ConstantOffsetPtrs
[&I
] = BaseAndOffset
;
347 // Also handle SROA candidates here, we already know that the GEP is
348 // all-constant indexed.
350 SROAArgValues
[&I
] = SROAArg
;
356 if (isGEPOffsetConstant(I
)) {
358 SROAArgValues
[&I
] = SROAArg
;
360 // Constant GEPs are modeled as free.
364 // Variable GEPs will require math and will disable SROA.
370 bool CallAnalyzer::visitBitCast(BitCastInst
&I
) {
371 // Propagate constants through bitcasts.
372 Constant
*COp
= dyn_cast
<Constant
>(I
.getOperand(0));
374 COp
= SimplifiedValues
.lookup(I
.getOperand(0));
376 if (Constant
*C
= ConstantExpr::getBitCast(COp
, I
.getType())) {
377 SimplifiedValues
[&I
] = C
;
381 // Track base/offsets through casts
382 std::pair
<Value
*, APInt
> BaseAndOffset
383 = ConstantOffsetPtrs
.lookup(I
.getOperand(0));
384 // Casts don't change the offset, just wrap it up.
385 if (BaseAndOffset
.first
)
386 ConstantOffsetPtrs
[&I
] = BaseAndOffset
;
388 // Also look for SROA candidates here.
390 DenseMap
<Value
*, int>::iterator CostIt
;
391 if (lookupSROAArgAndCost(I
.getOperand(0), SROAArg
, CostIt
))
392 SROAArgValues
[&I
] = SROAArg
;
394 // Bitcasts are always zero cost.
398 bool CallAnalyzer::visitPtrToInt(PtrToIntInst
&I
) {
399 const DataLayout
*DL
= I
.getDataLayout();
400 // Propagate constants through ptrtoint.
401 Constant
*COp
= dyn_cast
<Constant
>(I
.getOperand(0));
403 COp
= SimplifiedValues
.lookup(I
.getOperand(0));
405 if (Constant
*C
= ConstantExpr::getPtrToInt(COp
, I
.getType())) {
406 SimplifiedValues
[&I
] = C
;
410 // Track base/offset pairs when converted to a plain integer provided the
411 // integer is large enough to represent the pointer.
412 unsigned IntegerSize
= I
.getType()->getScalarSizeInBits();
413 if (DL
&& IntegerSize
>= DL
->getPointerSizeInBits()) {
414 std::pair
<Value
*, APInt
> BaseAndOffset
415 = ConstantOffsetPtrs
.lookup(I
.getOperand(0));
416 if (BaseAndOffset
.first
)
417 ConstantOffsetPtrs
[&I
] = BaseAndOffset
;
420 // This is really weird. Technically, ptrtoint will disable SROA. However,
421 // unless that ptrtoint is *used* somewhere in the live basic blocks after
422 // inlining, it will be nuked, and SROA should proceed. All of the uses which
423 // would block SROA would also block SROA if applied directly to a pointer,
424 // and so we can just add the integer in here. The only places where SROA is
425 // preserved either cannot fire on an integer, or won't in-and-of themselves
426 // disable SROA (ext) w/o some later use that we would see and disable.
428 DenseMap
<Value
*, int>::iterator CostIt
;
429 if (lookupSROAArgAndCost(I
.getOperand(0), SROAArg
, CostIt
))
430 SROAArgValues
[&I
] = SROAArg
;
432 return TargetTransformInfo::TCC_Free
== TTI
.getUserCost(&I
);
435 bool CallAnalyzer::visitIntToPtr(IntToPtrInst
&I
) {
436 const DataLayout
*DL
= I
.getDataLayout();
437 // Propagate constants through ptrtoint.
438 Constant
*COp
= dyn_cast
<Constant
>(I
.getOperand(0));
440 COp
= SimplifiedValues
.lookup(I
.getOperand(0));
442 if (Constant
*C
= ConstantExpr::getIntToPtr(COp
, I
.getType())) {
443 SimplifiedValues
[&I
] = C
;
447 // Track base/offset pairs when round-tripped through a pointer without
448 // modifications provided the integer is not too large.
449 Value
*Op
= I
.getOperand(0);
450 unsigned IntegerSize
= Op
->getType()->getScalarSizeInBits();
451 if (DL
&& IntegerSize
<= DL
->getPointerSizeInBits()) {
452 std::pair
<Value
*, APInt
> BaseAndOffset
= ConstantOffsetPtrs
.lookup(Op
);
453 if (BaseAndOffset
.first
)
454 ConstantOffsetPtrs
[&I
] = BaseAndOffset
;
457 // "Propagate" SROA here in the same manner as we do for ptrtoint above.
459 DenseMap
<Value
*, int>::iterator CostIt
;
460 if (lookupSROAArgAndCost(Op
, SROAArg
, CostIt
))
461 SROAArgValues
[&I
] = SROAArg
;
463 return TargetTransformInfo::TCC_Free
== TTI
.getUserCost(&I
);
466 bool CallAnalyzer::visitCastInst(CastInst
&I
) {
467 // Propagate constants through ptrtoint.
468 Constant
*COp
= dyn_cast
<Constant
>(I
.getOperand(0));
470 COp
= SimplifiedValues
.lookup(I
.getOperand(0));
472 if (Constant
*C
= ConstantExpr::getCast(I
.getOpcode(), COp
, I
.getType())) {
473 SimplifiedValues
[&I
] = C
;
477 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
478 disableSROA(I
.getOperand(0));
480 return TargetTransformInfo::TCC_Free
== TTI
.getUserCost(&I
);
483 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction
&I
) {
484 Value
*Operand
= I
.getOperand(0);
485 Constant
*COp
= dyn_cast
<Constant
>(Operand
);
487 COp
= SimplifiedValues
.lookup(Operand
);
489 if (Constant
*C
= ConstantFoldInstOperands(I
.getOpcode(), I
.getType(),
491 SimplifiedValues
[&I
] = C
;
495 // Disable any SROA on the argument to arbitrary unary operators.
496 disableSROA(Operand
);
501 bool CallAnalyzer::visitCmpInst(CmpInst
&I
) {
502 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
503 // First try to handle simplified comparisons.
504 if (!isa
<Constant
>(LHS
))
505 if (Constant
*SimpleLHS
= SimplifiedValues
.lookup(LHS
))
507 if (!isa
<Constant
>(RHS
))
508 if (Constant
*SimpleRHS
= SimplifiedValues
.lookup(RHS
))
510 if (Constant
*CLHS
= dyn_cast
<Constant
>(LHS
)) {
511 if (Constant
*CRHS
= dyn_cast
<Constant
>(RHS
))
512 if (Constant
*C
= ConstantExpr::getCompare(I
.getPredicate(), CLHS
, CRHS
)) {
513 SimplifiedValues
[&I
] = C
;
518 if (I
.getOpcode() == Instruction::FCmp
)
521 // Otherwise look for a comparison between constant offset pointers with
523 Value
*LHSBase
, *RHSBase
;
524 APInt LHSOffset
, RHSOffset
;
525 std::tie(LHSBase
, LHSOffset
) = ConstantOffsetPtrs
.lookup(LHS
);
527 std::tie(RHSBase
, RHSOffset
) = ConstantOffsetPtrs
.lookup(RHS
);
528 if (RHSBase
&& LHSBase
== RHSBase
) {
529 // We have common bases, fold the icmp to a constant based on the
531 Constant
*CLHS
= ConstantInt::get(LHS
->getContext(), LHSOffset
);
532 Constant
*CRHS
= ConstantInt::get(RHS
->getContext(), RHSOffset
);
533 if (Constant
*C
= ConstantExpr::getICmp(I
.getPredicate(), CLHS
, CRHS
)) {
534 SimplifiedValues
[&I
] = C
;
535 ++NumConstantPtrCmps
;
541 // If the comparison is an equality comparison with null, we can simplify it
542 // for any alloca-derived argument.
543 if (I
.isEquality() && isa
<ConstantPointerNull
>(I
.getOperand(1)))
544 if (isAllocaDerivedArg(I
.getOperand(0))) {
545 // We can actually predict the result of comparisons between an
546 // alloca-derived value and null. Note that this fires regardless of
548 bool IsNotEqual
= I
.getPredicate() == CmpInst::ICMP_NE
;
549 SimplifiedValues
[&I
] = IsNotEqual
? ConstantInt::getTrue(I
.getType())
550 : ConstantInt::getFalse(I
.getType());
554 // Finally check for SROA candidates in comparisons.
556 DenseMap
<Value
*, int>::iterator CostIt
;
557 if (lookupSROAArgAndCost(I
.getOperand(0), SROAArg
, CostIt
)) {
558 if (isa
<ConstantPointerNull
>(I
.getOperand(1))) {
559 accumulateSROACost(CostIt
, InlineConstants::InstrCost
);
569 bool CallAnalyzer::visitSub(BinaryOperator
&I
) {
570 // Try to handle a special case: we can fold computing the difference of two
571 // constant-related pointers.
572 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
573 Value
*LHSBase
, *RHSBase
;
574 APInt LHSOffset
, RHSOffset
;
575 std::tie(LHSBase
, LHSOffset
) = ConstantOffsetPtrs
.lookup(LHS
);
577 std::tie(RHSBase
, RHSOffset
) = ConstantOffsetPtrs
.lookup(RHS
);
578 if (RHSBase
&& LHSBase
== RHSBase
) {
579 // We have common bases, fold the subtract to a constant based on the
581 Constant
*CLHS
= ConstantInt::get(LHS
->getContext(), LHSOffset
);
582 Constant
*CRHS
= ConstantInt::get(RHS
->getContext(), RHSOffset
);
583 if (Constant
*C
= ConstantExpr::getSub(CLHS
, CRHS
)) {
584 SimplifiedValues
[&I
] = C
;
585 ++NumConstantPtrDiffs
;
591 // Otherwise, fall back to the generic logic for simplifying and handling
593 return Base::visitSub(I
);
596 bool CallAnalyzer::visitBinaryOperator(BinaryOperator
&I
) {
597 Value
*LHS
= I
.getOperand(0), *RHS
= I
.getOperand(1);
598 if (!isa
<Constant
>(LHS
))
599 if (Constant
*SimpleLHS
= SimplifiedValues
.lookup(LHS
))
601 if (!isa
<Constant
>(RHS
))
602 if (Constant
*SimpleRHS
= SimplifiedValues
.lookup(RHS
))
604 Value
*SimpleV
= SimplifyBinOp(I
.getOpcode(), LHS
, RHS
, DL
);
605 if (Constant
*C
= dyn_cast_or_null
<Constant
>(SimpleV
)) {
606 SimplifiedValues
[&I
] = C
;
610 // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
617 bool CallAnalyzer::visitLoad(LoadInst
&I
) {
619 DenseMap
<Value
*, int>::iterator CostIt
;
620 if (lookupSROAArgAndCost(I
.getOperand(0), SROAArg
, CostIt
)) {
622 accumulateSROACost(CostIt
, InlineConstants::InstrCost
);
632 bool CallAnalyzer::visitStore(StoreInst
&I
) {
634 DenseMap
<Value
*, int>::iterator CostIt
;
635 if (lookupSROAArgAndCost(I
.getOperand(0), SROAArg
, CostIt
)) {
637 accumulateSROACost(CostIt
, InlineConstants::InstrCost
);
647 bool CallAnalyzer::visitExtractValue(ExtractValueInst
&I
) {
648 // Constant folding for extract value is trivial.
649 Constant
*C
= dyn_cast
<Constant
>(I
.getAggregateOperand());
651 C
= SimplifiedValues
.lookup(I
.getAggregateOperand());
653 SimplifiedValues
[&I
] = ConstantExpr::getExtractValue(C
, I
.getIndices());
657 // SROA can look through these but give them a cost.
661 bool CallAnalyzer::visitInsertValue(InsertValueInst
&I
) {
662 // Constant folding for insert value is trivial.
663 Constant
*AggC
= dyn_cast
<Constant
>(I
.getAggregateOperand());
665 AggC
= SimplifiedValues
.lookup(I
.getAggregateOperand());
666 Constant
*InsertedC
= dyn_cast
<Constant
>(I
.getInsertedValueOperand());
668 InsertedC
= SimplifiedValues
.lookup(I
.getInsertedValueOperand());
669 if (AggC
&& InsertedC
) {
670 SimplifiedValues
[&I
] = ConstantExpr::getInsertValue(AggC
, InsertedC
,
675 // SROA can look through these but give them a cost.
679 /// \brief Try to simplify a call site.
681 /// Takes a concrete function and callsite and tries to actually simplify it by
682 /// analyzing the arguments and call itself with instsimplify. Returns true if
683 /// it has simplified the callsite to some other entity (a constant), making it
685 bool CallAnalyzer::simplifyCallSite(Function
*F
, CallSite CS
) {
686 // FIXME: Using the instsimplify logic directly for this is inefficient
687 // because we have to continually rebuild the argument list even when no
688 // simplifications can be performed. Until that is fixed with remapping
689 // inside of instsimplify, directly constant fold calls here.
690 if (!canConstantFoldCallTo(F
))
693 // Try to re-map the arguments to constants.
694 SmallVector
<Constant
*, 4> ConstantArgs
;
695 ConstantArgs
.reserve(CS
.arg_size());
696 for (CallSite::arg_iterator I
= CS
.arg_begin(), E
= CS
.arg_end();
698 Constant
*C
= dyn_cast
<Constant
>(*I
);
700 C
= dyn_cast_or_null
<Constant
>(SimplifiedValues
.lookup(*I
));
702 return false; // This argument doesn't map to a constant.
704 ConstantArgs
.push_back(C
);
706 if (Constant
*C
= ConstantFoldCall(F
, ConstantArgs
)) {
707 SimplifiedValues
[CS
.getInstruction()] = C
;
714 bool CallAnalyzer::visitCallSite(CallSite CS
) {
715 if (CS
.hasFnAttr(Attribute::ReturnsTwice
) &&
716 !F
.getAttributes().hasAttribute(AttributeSet::FunctionIndex
,
717 Attribute::ReturnsTwice
)) {
718 // This aborts the entire analysis.
719 ExposesReturnsTwice
= true;
723 cast
<CallInst
>(CS
.getInstruction())->cannotDuplicate())
724 ContainsNoDuplicateCall
= true;
726 if (Function
*F
= CS
.getCalledFunction()) {
727 // When we have a concrete function, first try to simplify it directly.
728 if (simplifyCallSite(F
, CS
))
731 // Next check if it is an intrinsic we know about.
732 // FIXME: Lift this into part of the InstVisitor.
733 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(CS
.getInstruction())) {
734 switch (II
->getIntrinsicID()) {
736 return Base::visitCallSite(CS
);
738 case Intrinsic::memset
:
739 case Intrinsic::memcpy
:
740 case Intrinsic::memmove
:
741 // SROA can usually chew through these intrinsics, but they aren't free.
746 if (F
== CS
.getInstruction()->getParent()->getParent()) {
747 // This flag will fully abort the analysis, so don't bother with anything
749 IsRecursiveCall
= true;
753 if (TTI
.isLoweredToCall(F
)) {
754 // We account for the average 1 instruction per call argument setup
756 Cost
+= CS
.arg_size() * InlineConstants::InstrCost
;
758 // Everything other than inline ASM will also have a significant cost
759 // merely from making the call.
760 if (!isa
<InlineAsm
>(CS
.getCalledValue()))
761 Cost
+= InlineConstants::CallPenalty
;
764 return Base::visitCallSite(CS
);
767 // Otherwise we're in a very special case -- an indirect function call. See
768 // if we can be particularly clever about this.
769 Value
*Callee
= CS
.getCalledValue();
771 // First, pay the price of the argument setup. We account for the average
772 // 1 instruction per call argument setup here.
773 Cost
+= CS
.arg_size() * InlineConstants::InstrCost
;
775 // Next, check if this happens to be an indirect function call to a known
776 // function in this inline context. If not, we've done all we can.
777 Function
*F
= dyn_cast_or_null
<Function
>(SimplifiedValues
.lookup(Callee
));
779 return Base::visitCallSite(CS
);
781 // If we have a constant that we are calling as a function, we can peer
782 // through it and see the function target. This happens not infrequently
783 // during devirtualization and so we want to give it a hefty bonus for
784 // inlining, but cap that bonus in the event that inlining wouldn't pan
785 // out. Pretend to inline the function, with a custom threshold.
786 CallAnalyzer
CA(DL
, TTI
, ACT
, *F
, InlineConstants::IndirectCallThreshold
);
787 if (CA
.analyzeCall(CS
)) {
788 // We were able to inline the indirect call! Subtract the cost from the
789 // bonus we want to apply, but don't go below zero.
790 Cost
-= std::max(0, InlineConstants::IndirectCallThreshold
- CA
.getCost());
793 return Base::visitCallSite(CS
);
796 bool CallAnalyzer::visitReturnInst(ReturnInst
&RI
) {
797 // At least one return instruction will be free after inlining.
798 bool Free
= !HasReturn
;
803 bool CallAnalyzer::visitBranchInst(BranchInst
&BI
) {
804 // We model unconditional branches as essentially free -- they really
805 // shouldn't exist at all, but handling them makes the behavior of the
806 // inliner more regular and predictable. Interestingly, conditional branches
807 // which will fold away are also free.
808 return BI
.isUnconditional() || isa
<ConstantInt
>(BI
.getCondition()) ||
809 dyn_cast_or_null
<ConstantInt
>(
810 SimplifiedValues
.lookup(BI
.getCondition()));
813 bool CallAnalyzer::visitSwitchInst(SwitchInst
&SI
) {
814 // We model unconditional switches as free, see the comments on handling
816 if (isa
<ConstantInt
>(SI
.getCondition()))
818 if (Value
*V
= SimplifiedValues
.lookup(SI
.getCondition()))
819 if (isa
<ConstantInt
>(V
))
822 // Otherwise, we need to accumulate a cost proportional to the number of
823 // distinct successor blocks. This fan-out in the CFG cannot be represented
824 // for free even if we can represent the core switch as a jumptable that
825 // takes a single instruction.
827 // NB: We convert large switches which are just used to initialize large phi
828 // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
829 // inlining those. It will prevent inlining in cases where the optimization
830 // does not (yet) fire.
831 SmallPtrSet
<BasicBlock
*, 8> SuccessorBlocks
;
832 SuccessorBlocks
.insert(SI
.getDefaultDest());
833 for (auto I
= SI
.case_begin(), E
= SI
.case_end(); I
!= E
; ++I
)
834 SuccessorBlocks
.insert(I
.getCaseSuccessor());
835 // Add cost corresponding to the number of distinct destinations. The first
836 // we model as free because of fallthrough.
837 Cost
+= (SuccessorBlocks
.size() - 1) * InlineConstants::InstrCost
;
841 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst
&IBI
) {
842 // We never want to inline functions that contain an indirectbr. This is
843 // incorrect because all the blockaddress's (in static global initializers
844 // for example) would be referring to the original function, and this
845 // indirect jump would jump from the inlined copy of the function into the
846 // original function which is extremely undefined behavior.
847 // FIXME: This logic isn't really right; we can safely inline functions with
848 // indirectbr's as long as no other function or global references the
849 // blockaddress of a block within the current function.
850 HasIndirectBr
= true;
854 bool CallAnalyzer::visitResumeInst(ResumeInst
&RI
) {
855 // FIXME: It's not clear that a single instruction is an accurate model for
856 // the inline cost of a resume instruction.
860 bool CallAnalyzer::visitUnreachableInst(UnreachableInst
&I
) {
861 // FIXME: It might be reasonably to discount the cost of instructions leading
862 // to unreachable as they have the lowest possible impact on both runtime and
864 return true; // No actual code is needed for unreachable.
867 bool CallAnalyzer::visitInstruction(Instruction
&I
) {
868 // Some instructions are free. All of the free intrinsics can also be
869 // handled by SROA, etc.
870 if (TargetTransformInfo::TCC_Free
== TTI
.getUserCost(&I
))
873 // We found something we don't understand or can't handle. Mark any SROA-able
874 // values in the operand list as no longer viable.
875 for (User::op_iterator OI
= I
.op_begin(), OE
= I
.op_end(); OI
!= OE
; ++OI
)
882 /// \brief Analyze a basic block for its contribution to the inline cost.
884 /// This method walks the analyzer over every instruction in the given basic
885 /// block and accounts for their cost during inlining at this callsite. It
886 /// aborts early if the threshold has been exceeded or an impossible to inline
887 /// construct has been detected. It returns false if inlining is no longer
888 /// viable, and true if inlining remains viable.
889 bool CallAnalyzer::analyzeBlock(BasicBlock
*BB
,
890 SmallPtrSetImpl
<const Value
*> &EphValues
) {
891 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ++I
) {
892 // FIXME: Currently, the number of instructions in a function regardless of
893 // our ability to simplify them during inline to constants or dead code,
894 // are actually used by the vector bonus heuristic. As long as that's true,
895 // we have to special case debug intrinsics here to prevent differences in
896 // inlining due to debug symbols. Eventually, the number of unsimplified
897 // instructions shouldn't factor into the cost computation, but until then,
898 // hack around it here.
899 if (isa
<DbgInfoIntrinsic
>(I
))
902 // Skip ephemeral values.
903 if (EphValues
.count(I
))
907 if (isa
<ExtractElementInst
>(I
) || I
->getType()->isVectorTy())
908 ++NumVectorInstructions
;
910 // If the instruction simplified to a constant, there is no cost to this
911 // instruction. Visit the instructions using our InstVisitor to account for
912 // all of the per-instruction logic. The visit tree returns true if we
913 // consumed the instruction in any way, and false if the instruction's base
914 // cost should count against inlining.
916 ++NumInstructionsSimplified
;
918 Cost
+= InlineConstants::InstrCost
;
920 // If the visit this instruction detected an uninlinable pattern, abort.
921 if (IsRecursiveCall
|| ExposesReturnsTwice
|| HasDynamicAlloca
||
925 // If the caller is a recursive function then we don't want to inline
926 // functions which allocate a lot of stack space because it would increase
927 // the caller stack usage dramatically.
928 if (IsCallerRecursive
&&
929 AllocatedSize
> InlineConstants::TotalAllocaSizeRecursiveCaller
)
932 if (NumVectorInstructions
> NumInstructions
/2)
933 VectorBonus
= FiftyPercentVectorBonus
;
934 else if (NumVectorInstructions
> NumInstructions
/10)
935 VectorBonus
= TenPercentVectorBonus
;
939 // Check if we've past the threshold so we don't spin in huge basic
940 // blocks that will never inline.
941 if (Cost
> (Threshold
+ VectorBonus
))
948 /// \brief Compute the base pointer and cumulative constant offsets for V.
950 /// This strips all constant offsets off of V, leaving it the base pointer, and
951 /// accumulates the total constant offset applied in the returned constant. It
952 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
953 /// no constant offsets applied.
954 ConstantInt
*CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value
*&V
) {
955 if (!DL
|| !V
->getType()->isPointerTy())
958 unsigned IntPtrWidth
= DL
->getPointerSizeInBits();
959 APInt Offset
= APInt::getNullValue(IntPtrWidth
);
961 // Even though we don't look through PHI nodes, we could be called on an
962 // instruction in an unreachable block, which may be on a cycle.
963 SmallPtrSet
<Value
*, 4> Visited
;
966 if (GEPOperator
*GEP
= dyn_cast
<GEPOperator
>(V
)) {
967 if (!GEP
->isInBounds() || !accumulateGEPOffset(*GEP
, Offset
))
969 V
= GEP
->getPointerOperand();
970 } else if (Operator::getOpcode(V
) == Instruction::BitCast
) {
971 V
= cast
<Operator
>(V
)->getOperand(0);
972 } else if (GlobalAlias
*GA
= dyn_cast
<GlobalAlias
>(V
)) {
973 if (GA
->mayBeOverridden())
975 V
= GA
->getAliasee();
979 assert(V
->getType()->isPointerTy() && "Unexpected operand type!");
980 } while (Visited
.insert(V
).second
);
982 Type
*IntPtrTy
= DL
->getIntPtrType(V
->getContext());
983 return cast
<ConstantInt
>(ConstantInt::get(IntPtrTy
, Offset
));
986 /// \brief Analyze a call site for potential inlining.
988 /// Returns true if inlining this call is viable, and false if it is not
989 /// viable. It computes the cost and adjusts the threshold based on numerous
990 /// factors and heuristics. If this method returns false but the computed cost
991 /// is below the computed threshold, then inlining was forcibly disabled by
992 /// some artifact of the routine.
993 bool CallAnalyzer::analyzeCall(CallSite CS
) {
996 // Track whether the post-inlining function would have more than one basic
997 // block. A single basic block is often intended for inlining. Balloon the
998 // threshold by 50% until we pass the single-BB phase.
999 bool SingleBB
= true;
1000 int SingleBBBonus
= Threshold
/ 2;
1001 Threshold
+= SingleBBBonus
;
1003 // Perform some tweaks to the cost and threshold based on the direct
1004 // callsite information.
1006 // We want to more aggressively inline vector-dense kernels, so up the
1007 // threshold, and we'll lower it if the % of vector instructions gets too
1009 assert(NumInstructions
== 0);
1010 assert(NumVectorInstructions
== 0);
1011 FiftyPercentVectorBonus
= Threshold
;
1012 TenPercentVectorBonus
= Threshold
/ 2;
1014 // Give out bonuses per argument, as the instructions setting them up will
1015 // be gone after inlining.
1016 for (unsigned I
= 0, E
= CS
.arg_size(); I
!= E
; ++I
) {
1017 if (DL
&& CS
.isByValArgument(I
)) {
1018 // We approximate the number of loads and stores needed by dividing the
1019 // size of the byval type by the target's pointer size.
1020 PointerType
*PTy
= cast
<PointerType
>(CS
.getArgument(I
)->getType());
1021 unsigned TypeSize
= DL
->getTypeSizeInBits(PTy
->getElementType());
1022 unsigned PointerSize
= DL
->getPointerSizeInBits();
1023 // Ceiling division.
1024 unsigned NumStores
= (TypeSize
+ PointerSize
- 1) / PointerSize
;
1026 // If it generates more than 8 stores it is likely to be expanded as an
1027 // inline memcpy so we take that as an upper bound. Otherwise we assume
1028 // one load and one store per word copied.
1029 // FIXME: The maxStoresPerMemcpy setting from the target should be used
1030 // here instead of a magic number of 8, but it's not available via
1032 NumStores
= std::min(NumStores
, 8U);
1034 Cost
-= 2 * NumStores
* InlineConstants::InstrCost
;
1036 // For non-byval arguments subtract off one instruction per call
1038 Cost
-= InlineConstants::InstrCost
;
1042 // If there is only one call of the function, and it has internal linkage,
1043 // the cost of inlining it drops dramatically.
1044 bool OnlyOneCallAndLocalLinkage
= F
.hasLocalLinkage() && F
.hasOneUse() &&
1045 &F
== CS
.getCalledFunction();
1046 if (OnlyOneCallAndLocalLinkage
)
1047 Cost
+= InlineConstants::LastCallToStaticBonus
;
1049 // If the instruction after the call, or if the normal destination of the
1050 // invoke is an unreachable instruction, the function is noreturn. As such,
1051 // there is little point in inlining this unless there is literally zero
1053 Instruction
*Instr
= CS
.getInstruction();
1054 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(Instr
)) {
1055 if (isa
<UnreachableInst
>(II
->getNormalDest()->begin()))
1057 } else if (isa
<UnreachableInst
>(++BasicBlock::iterator(Instr
)))
1060 // If this function uses the coldcc calling convention, prefer not to inline
1062 if (F
.getCallingConv() == CallingConv::Cold
)
1063 Cost
+= InlineConstants::ColdccPenalty
;
1065 // Check if we're done. This can happen due to bonuses and penalties.
1066 if (Cost
> Threshold
)
1072 Function
*Caller
= CS
.getInstruction()->getParent()->getParent();
1073 // Check if the caller function is recursive itself.
1074 for (User
*U
: Caller
->users()) {
1078 Instruction
*I
= Site
.getInstruction();
1079 if (I
->getParent()->getParent() == Caller
) {
1080 IsCallerRecursive
= true;
1085 // Populate our simplified values by mapping from function arguments to call
1086 // arguments with known important simplifications.
1087 CallSite::arg_iterator CAI
= CS
.arg_begin();
1088 for (Function::arg_iterator FAI
= F
.arg_begin(), FAE
= F
.arg_end();
1089 FAI
!= FAE
; ++FAI
, ++CAI
) {
1090 assert(CAI
!= CS
.arg_end());
1091 if (Constant
*C
= dyn_cast
<Constant
>(CAI
))
1092 SimplifiedValues
[FAI
] = C
;
1094 Value
*PtrArg
= *CAI
;
1095 if (ConstantInt
*C
= stripAndComputeInBoundsConstantOffsets(PtrArg
)) {
1096 ConstantOffsetPtrs
[FAI
] = std::make_pair(PtrArg
, C
->getValue());
1098 // We can SROA any pointer arguments derived from alloca instructions.
1099 if (isa
<AllocaInst
>(PtrArg
)) {
1100 SROAArgValues
[FAI
] = PtrArg
;
1101 SROAArgCosts
[PtrArg
] = 0;
1105 NumConstantArgs
= SimplifiedValues
.size();
1106 NumConstantOffsetPtrArgs
= ConstantOffsetPtrs
.size();
1107 NumAllocaArgs
= SROAArgValues
.size();
1109 // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1110 // the ephemeral values multiple times (and they're completely determined by
1111 // the callee, so this is purely duplicate work).
1112 SmallPtrSet
<const Value
*, 32> EphValues
;
1113 CodeMetrics::collectEphemeralValues(&F
, &ACT
->getAssumptionCache(F
), EphValues
);
1115 // The worklist of live basic blocks in the callee *after* inlining. We avoid
1116 // adding basic blocks of the callee which can be proven to be dead for this
1117 // particular call site in order to get more accurate cost estimates. This
1118 // requires a somewhat heavyweight iteration pattern: we need to walk the
1119 // basic blocks in a breadth-first order as we insert live successors. To
1120 // accomplish this, prioritizing for small iterations because we exit after
1121 // crossing our threshold, we use a small-size optimized SetVector.
1122 typedef SetVector
<BasicBlock
*, SmallVector
<BasicBlock
*, 16>,
1123 SmallPtrSet
<BasicBlock
*, 16> > BBSetVector
;
1124 BBSetVector BBWorklist
;
1125 BBWorklist
.insert(&F
.getEntryBlock());
1126 // Note that we *must not* cache the size, this loop grows the worklist.
1127 for (unsigned Idx
= 0; Idx
!= BBWorklist
.size(); ++Idx
) {
1128 // Bail out the moment we cross the threshold. This means we'll under-count
1129 // the cost, but only when undercounting doesn't matter.
1130 if (Cost
> (Threshold
+ VectorBonus
))
1133 BasicBlock
*BB
= BBWorklist
[Idx
];
1137 // Disallow inlining a blockaddress. A blockaddress only has defined
1138 // behavior for an indirect branch in the same function, and we do not
1139 // currently support inlining indirect branches. But, the inliner may not
1140 // see an indirect branch that ends up being dead code at a particular call
1141 // site. If the blockaddress escapes the function, e.g., via a global
1142 // variable, inlining may lead to an invalid cross-function reference.
1143 if (BB
->hasAddressTaken())
1146 // Analyze the cost of this block. If we blow through the threshold, this
1147 // returns false, and we can bail on out.
1148 if (!analyzeBlock(BB
, EphValues
)) {
1149 if (IsRecursiveCall
|| ExposesReturnsTwice
|| HasDynamicAlloca
||
1153 // If the caller is a recursive function then we don't want to inline
1154 // functions which allocate a lot of stack space because it would increase
1155 // the caller stack usage dramatically.
1156 if (IsCallerRecursive
&&
1157 AllocatedSize
> InlineConstants::TotalAllocaSizeRecursiveCaller
)
1163 TerminatorInst
*TI
= BB
->getTerminator();
1165 // Add in the live successors by first checking whether we have terminator
1166 // that may be simplified based on the values simplified by this call.
1167 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
)) {
1168 if (BI
->isConditional()) {
1169 Value
*Cond
= BI
->getCondition();
1170 if (ConstantInt
*SimpleCond
1171 = dyn_cast_or_null
<ConstantInt
>(SimplifiedValues
.lookup(Cond
))) {
1172 BBWorklist
.insert(BI
->getSuccessor(SimpleCond
->isZero() ? 1 : 0));
1176 } else if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(TI
)) {
1177 Value
*Cond
= SI
->getCondition();
1178 if (ConstantInt
*SimpleCond
1179 = dyn_cast_or_null
<ConstantInt
>(SimplifiedValues
.lookup(Cond
))) {
1180 BBWorklist
.insert(SI
->findCaseValue(SimpleCond
).getCaseSuccessor());
1185 // If we're unable to select a particular successor, just count all of
1187 for (unsigned TIdx
= 0, TSize
= TI
->getNumSuccessors(); TIdx
!= TSize
;
1189 BBWorklist
.insert(TI
->getSuccessor(TIdx
));
1191 // If we had any successors at this point, than post-inlining is likely to
1192 // have them as well. Note that we assume any basic blocks which existed
1193 // due to branches or switches which folded above will also fold after
1195 if (SingleBB
&& TI
->getNumSuccessors() > 1) {
1196 // Take off the bonus we applied to the threshold.
1197 Threshold
-= SingleBBBonus
;
1202 // If this is a noduplicate call, we can still inline as long as
1203 // inlining this would cause the removal of the caller (so the instruction
1204 // is not actually duplicated, just moved).
1205 if (!OnlyOneCallAndLocalLinkage
&& ContainsNoDuplicateCall
)
1208 Threshold
+= VectorBonus
;
1210 return Cost
< Threshold
;
1213 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214 /// \brief Dump stats about this call's analysis.
1215 void CallAnalyzer::dump() {
1216 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1217 DEBUG_PRINT_STAT(NumConstantArgs
);
1218 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs
);
1219 DEBUG_PRINT_STAT(NumAllocaArgs
);
1220 DEBUG_PRINT_STAT(NumConstantPtrCmps
);
1221 DEBUG_PRINT_STAT(NumConstantPtrDiffs
);
1222 DEBUG_PRINT_STAT(NumInstructionsSimplified
);
1223 DEBUG_PRINT_STAT(SROACostSavings
);
1224 DEBUG_PRINT_STAT(SROACostSavingsLost
);
1225 DEBUG_PRINT_STAT(ContainsNoDuplicateCall
);
1226 DEBUG_PRINT_STAT(Cost
);
1227 DEBUG_PRINT_STAT(Threshold
);
1228 DEBUG_PRINT_STAT(VectorBonus
);
1229 #undef DEBUG_PRINT_STAT
1233 INITIALIZE_PASS_BEGIN(InlineCostAnalysis
, "inline-cost", "Inline Cost Analysis",
1235 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo
)
1236 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker
)
1237 INITIALIZE_PASS_END(InlineCostAnalysis
, "inline-cost", "Inline Cost Analysis",
1240 char InlineCostAnalysis::ID
= 0;
1242 InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID
) {}
1244 InlineCostAnalysis::~InlineCostAnalysis() {}
1246 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage
&AU
) const {
1247 AU
.setPreservesAll();
1248 AU
.addRequired
<AssumptionCacheTracker
>();
1249 AU
.addRequired
<TargetTransformInfo
>();
1250 CallGraphSCCPass::getAnalysisUsage(AU
);
1253 bool InlineCostAnalysis::runOnSCC(CallGraphSCC
&SCC
) {
1254 TTI
= &getAnalysis
<TargetTransformInfo
>();
1255 ACT
= &getAnalysis
<AssumptionCacheTracker
>();
1259 InlineCost
InlineCostAnalysis::getInlineCost(CallSite CS
, int Threshold
) {
1260 return getInlineCost(CS
, CS
.getCalledFunction(), Threshold
);
1263 /// \brief Test that two functions either have or have not the given attribute
1264 /// at the same time.
1265 static bool attributeMatches(Function
*F1
, Function
*F2
,
1266 Attribute::AttrKind Attr
) {
1267 return F1
->hasFnAttribute(Attr
) == F2
->hasFnAttribute(Attr
);
1270 /// \brief Test that there are no attribute conflicts between Caller and Callee
1271 /// that prevent inlining.
1272 static bool functionsHaveCompatibleAttributes(Function
*Caller
,
1274 return attributeMatches(Caller
, Callee
, Attribute::SanitizeAddress
) &&
1275 attributeMatches(Caller
, Callee
, Attribute::SanitizeMemory
) &&
1276 attributeMatches(Caller
, Callee
, Attribute::SanitizeThread
);
1279 InlineCost
InlineCostAnalysis::getInlineCost(CallSite CS
, Function
*Callee
,
1281 // Cannot inline indirect calls.
1283 return llvm::InlineCost::getNever();
1285 // Calls to functions with always-inline attributes should be inlined
1286 // whenever possible.
1287 if (CS
.hasFnAttr(Attribute::AlwaysInline
)) {
1288 if (isInlineViable(*Callee
))
1289 return llvm::InlineCost::getAlways();
1290 return llvm::InlineCost::getNever();
1293 // Never inline functions with conflicting attributes (unless callee has
1294 // always-inline attribute).
1295 if (!functionsHaveCompatibleAttributes(CS
.getCaller(), Callee
))
1296 return llvm::InlineCost::getNever();
1298 // Don't inline this call if the caller has the optnone attribute.
1299 if (CS
.getCaller()->hasFnAttribute(Attribute::OptimizeNone
))
1300 return llvm::InlineCost::getNever();
1302 // Don't inline functions which can be redefined at link-time to mean
1303 // something else. Don't inline functions marked noinline or call sites
1305 if (Callee
->mayBeOverridden() ||
1306 Callee
->hasFnAttribute(Attribute::NoInline
) || CS
.isNoInline())
1307 return llvm::InlineCost::getNever();
1309 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee
->getName()
1312 CallAnalyzer
CA(Callee
->getDataLayout(), *TTI
,
1313 ACT
, *Callee
, Threshold
);
1314 bool ShouldInline
= CA
.analyzeCall(CS
);
1318 // Check if there was a reason to force inlining or no inlining.
1319 if (!ShouldInline
&& CA
.getCost() < CA
.getThreshold())
1320 return InlineCost::getNever();
1321 if (ShouldInline
&& CA
.getCost() >= CA
.getThreshold())
1322 return InlineCost::getAlways();
1324 return llvm::InlineCost::get(CA
.getCost(), CA
.getThreshold());
1327 bool InlineCostAnalysis::isInlineViable(Function
&F
) {
1329 F
.getAttributes().hasAttribute(AttributeSet::FunctionIndex
,
1330 Attribute::ReturnsTwice
);
1331 for (Function::iterator BI
= F
.begin(), BE
= F
.end(); BI
!= BE
; ++BI
) {
1332 // Disallow inlining of functions which contain indirect branches or
1334 if (isa
<IndirectBrInst
>(BI
->getTerminator()) || BI
->hasAddressTaken())
1337 for (BasicBlock::iterator II
= BI
->begin(), IE
= BI
->end(); II
!= IE
;
1343 // Disallow recursive calls.
1344 if (&F
== CS
.getCalledFunction())
1347 // Disallow calls which expose returns-twice to a function not previously
1348 // attributed as such.
1349 if (!ReturnsTwice
&& CS
.isCall() &&
1350 cast
<CallInst
>(CS
.getInstruction())->canReturnTwice())