]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===- InstCombineCompares.cpp --------------------------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements the visitICmp and visitFCmp functions. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | #include "InstCombine.h" | |
85aaf69f SL |
15 | #include "llvm/ADT/APSInt.h" |
16 | #include "llvm/ADT/Statistic.h" | |
223e47cc LB |
17 | #include "llvm/Analysis/ConstantFolding.h" |
18 | #include "llvm/Analysis/InstructionSimplify.h" | |
19 | #include "llvm/Analysis/MemoryBuiltins.h" | |
1a4d82fc | 20 | #include "llvm/IR/ConstantRange.h" |
970d7e83 | 21 | #include "llvm/IR/DataLayout.h" |
1a4d82fc | 22 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
970d7e83 | 23 | #include "llvm/IR/IntrinsicInst.h" |
1a4d82fc | 24 | #include "llvm/IR/PatternMatch.h" |
85aaf69f SL |
25 | #include "llvm/Support/CommandLine.h" |
26 | #include "llvm/Support/Debug.h" | |
970d7e83 | 27 | #include "llvm/Target/TargetLibraryInfo.h" |
85aaf69f | 28 | |
223e47cc LB |
29 | using namespace llvm; |
30 | using namespace PatternMatch; | |
31 | ||
1a4d82fc JJ |
32 | #define DEBUG_TYPE "instcombine" |
33 | ||
85aaf69f SL |
34 | // How many times is a select replaced by one of its operands? |
35 | STATISTIC(NumSel, "Number of select opts"); | |
36 | ||
37 | // Initialization Routines | |
38 | ||
223e47cc LB |
39 | static ConstantInt *getOne(Constant *C) { |
40 | return ConstantInt::get(cast<IntegerType>(C->getType()), 1); | |
41 | } | |
42 | ||
223e47cc LB |
43 | static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { |
44 | return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); | |
45 | } | |
46 | ||
47 | static bool HasAddOverflow(ConstantInt *Result, | |
48 | ConstantInt *In1, ConstantInt *In2, | |
49 | bool IsSigned) { | |
50 | if (!IsSigned) | |
51 | return Result->getValue().ult(In1->getValue()); | |
52 | ||
53 | if (In2->isNegative()) | |
54 | return Result->getValue().sgt(In1->getValue()); | |
55 | return Result->getValue().slt(In1->getValue()); | |
56 | } | |
57 | ||
58 | /// AddWithOverflow - Compute Result = In1+In2, returning true if the result | |
59 | /// overflowed for this type. | |
60 | static bool AddWithOverflow(Constant *&Result, Constant *In1, | |
61 | Constant *In2, bool IsSigned = false) { | |
62 | Result = ConstantExpr::getAdd(In1, In2); | |
63 | ||
64 | if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { | |
65 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { | |
66 | Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); | |
67 | if (HasAddOverflow(ExtractElement(Result, Idx), | |
68 | ExtractElement(In1, Idx), | |
69 | ExtractElement(In2, Idx), | |
70 | IsSigned)) | |
71 | return true; | |
72 | } | |
73 | return false; | |
74 | } | |
75 | ||
76 | return HasAddOverflow(cast<ConstantInt>(Result), | |
77 | cast<ConstantInt>(In1), cast<ConstantInt>(In2), | |
78 | IsSigned); | |
79 | } | |
80 | ||
81 | static bool HasSubOverflow(ConstantInt *Result, | |
82 | ConstantInt *In1, ConstantInt *In2, | |
83 | bool IsSigned) { | |
84 | if (!IsSigned) | |
85 | return Result->getValue().ugt(In1->getValue()); | |
86 | ||
87 | if (In2->isNegative()) | |
88 | return Result->getValue().slt(In1->getValue()); | |
89 | ||
90 | return Result->getValue().sgt(In1->getValue()); | |
91 | } | |
92 | ||
93 | /// SubWithOverflow - Compute Result = In1-In2, returning true if the result | |
94 | /// overflowed for this type. | |
95 | static bool SubWithOverflow(Constant *&Result, Constant *In1, | |
96 | Constant *In2, bool IsSigned = false) { | |
97 | Result = ConstantExpr::getSub(In1, In2); | |
98 | ||
99 | if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { | |
100 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { | |
101 | Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); | |
102 | if (HasSubOverflow(ExtractElement(Result, Idx), | |
103 | ExtractElement(In1, Idx), | |
104 | ExtractElement(In2, Idx), | |
105 | IsSigned)) | |
106 | return true; | |
107 | } | |
108 | return false; | |
109 | } | |
110 | ||
111 | return HasSubOverflow(cast<ConstantInt>(Result), | |
112 | cast<ConstantInt>(In1), cast<ConstantInt>(In2), | |
113 | IsSigned); | |
114 | } | |
115 | ||
116 | /// isSignBitCheck - Given an exploded icmp instruction, return true if the | |
117 | /// comparison only checks the sign bit. If it only checks the sign bit, set | |
118 | /// TrueIfSigned if the result of the comparison is true when the input value is | |
119 | /// signed. | |
120 | static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, | |
121 | bool &TrueIfSigned) { | |
122 | switch (pred) { | |
123 | case ICmpInst::ICMP_SLT: // True if LHS s< 0 | |
124 | TrueIfSigned = true; | |
125 | return RHS->isZero(); | |
126 | case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 | |
127 | TrueIfSigned = true; | |
128 | return RHS->isAllOnesValue(); | |
129 | case ICmpInst::ICMP_SGT: // True if LHS s> -1 | |
130 | TrueIfSigned = false; | |
131 | return RHS->isAllOnesValue(); | |
132 | case ICmpInst::ICMP_UGT: | |
133 | // True if LHS u> RHS and RHS == high-bit-mask - 1 | |
134 | TrueIfSigned = true; | |
135 | return RHS->isMaxValue(true); | |
136 | case ICmpInst::ICMP_UGE: | |
137 | // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) | |
138 | TrueIfSigned = true; | |
139 | return RHS->getValue().isSignBit(); | |
140 | default: | |
141 | return false; | |
142 | } | |
143 | } | |
144 | ||
1a4d82fc JJ |
145 | /// Returns true if the exploded icmp can be expressed as a signed comparison |
146 | /// to zero and updates the predicate accordingly. | |
147 | /// The signedness of the comparison is preserved. | |
148 | static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { | |
149 | if (!ICmpInst::isSigned(pred)) | |
150 | return false; | |
151 | ||
152 | if (RHS->isZero()) | |
153 | return ICmpInst::isRelational(pred); | |
154 | ||
155 | if (RHS->isOne()) { | |
156 | if (pred == ICmpInst::ICMP_SLT) { | |
157 | pred = ICmpInst::ICMP_SLE; | |
158 | return true; | |
159 | } | |
160 | } else if (RHS->isAllOnesValue()) { | |
161 | if (pred == ICmpInst::ICMP_SGT) { | |
162 | pred = ICmpInst::ICMP_SGE; | |
163 | return true; | |
164 | } | |
165 | } | |
166 | ||
167 | return false; | |
168 | } | |
169 | ||
223e47cc LB |
170 | // isHighOnes - Return true if the constant is of the form 1+0+. |
171 | // This is the same as lowones(~X). | |
172 | static bool isHighOnes(const ConstantInt *CI) { | |
173 | return (~CI->getValue() + 1).isPowerOf2(); | |
174 | } | |
175 | ||
176 | /// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a | |
177 | /// set of known zero and one bits, compute the maximum and minimum values that | |
178 | /// could have the specified known zero and known one bits, returning them in | |
179 | /// min/max. | |
180 | static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, | |
181 | const APInt& KnownOne, | |
182 | APInt& Min, APInt& Max) { | |
183 | assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && | |
184 | KnownZero.getBitWidth() == Min.getBitWidth() && | |
185 | KnownZero.getBitWidth() == Max.getBitWidth() && | |
186 | "KnownZero, KnownOne and Min, Max must have equal bitwidth."); | |
187 | APInt UnknownBits = ~(KnownZero|KnownOne); | |
188 | ||
189 | // The minimum value is when all unknown bits are zeros, EXCEPT for the sign | |
190 | // bit if it is unknown. | |
191 | Min = KnownOne; | |
192 | Max = KnownOne|UnknownBits; | |
193 | ||
194 | if (UnknownBits.isNegative()) { // Sign bit is unknown | |
195 | Min.setBit(Min.getBitWidth()-1); | |
196 | Max.clearBit(Max.getBitWidth()-1); | |
197 | } | |
198 | } | |
199 | ||
200 | // ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and | |
201 | // a set of known zero and one bits, compute the maximum and minimum values that | |
202 | // could have the specified known zero and known one bits, returning them in | |
203 | // min/max. | |
204 | static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, | |
205 | const APInt &KnownOne, | |
206 | APInt &Min, APInt &Max) { | |
207 | assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && | |
208 | KnownZero.getBitWidth() == Min.getBitWidth() && | |
209 | KnownZero.getBitWidth() == Max.getBitWidth() && | |
210 | "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); | |
211 | APInt UnknownBits = ~(KnownZero|KnownOne); | |
212 | ||
213 | // The minimum value is when the unknown bits are all zeros. | |
214 | Min = KnownOne; | |
215 | // The maximum value is when the unknown bits are all ones. | |
216 | Max = KnownOne|UnknownBits; | |
217 | } | |
218 | ||
219 | ||
220 | ||
221 | /// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: | |
222 | /// cmp pred (load (gep GV, ...)), cmpcst | |
223 | /// where GV is a global variable with a constant initializer. Try to simplify | |
224 | /// this into some simple computation that does not need the load. For example | |
225 | /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". | |
226 | /// | |
227 | /// If AndCst is non-null, then the loaded value is masked with that constant | |
228 | /// before doing the comparison. This handles cases like "A[i]&4 == 0". | |
229 | Instruction *InstCombiner:: | |
230 | FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, | |
231 | CmpInst &ICI, ConstantInt *AndCst) { | |
232 | // We need TD information to know the pointer size unless this is inbounds. | |
1a4d82fc JJ |
233 | if (!GEP->isInBounds() && !DL) |
234 | return nullptr; | |
223e47cc LB |
235 | |
236 | Constant *Init = GV->getInitializer(); | |
237 | if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) | |
1a4d82fc JJ |
238 | return nullptr; |
239 | ||
223e47cc | 240 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); |
1a4d82fc | 241 | if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays. |
223e47cc LB |
242 | |
243 | // There are many forms of this optimization we can handle, for now, just do | |
244 | // the simple index into a single-dimensional array. | |
245 | // | |
246 | // Require: GEP GV, 0, i {{, constant indices}} | |
247 | if (GEP->getNumOperands() < 3 || | |
248 | !isa<ConstantInt>(GEP->getOperand(1)) || | |
249 | !cast<ConstantInt>(GEP->getOperand(1))->isZero() || | |
250 | isa<Constant>(GEP->getOperand(2))) | |
1a4d82fc | 251 | return nullptr; |
223e47cc LB |
252 | |
253 | // Check that indices after the variable are constants and in-range for the | |
254 | // type they index. Collect the indices. This is typically for arrays of | |
255 | // structs. | |
256 | SmallVector<unsigned, 4> LaterIndices; | |
257 | ||
258 | Type *EltTy = Init->getType()->getArrayElementType(); | |
259 | for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { | |
260 | ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); | |
1a4d82fc | 261 | if (!Idx) return nullptr; // Variable index. |
223e47cc LB |
262 | |
263 | uint64_t IdxVal = Idx->getZExtValue(); | |
1a4d82fc | 264 | if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. |
223e47cc LB |
265 | |
266 | if (StructType *STy = dyn_cast<StructType>(EltTy)) | |
267 | EltTy = STy->getElementType(IdxVal); | |
268 | else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { | |
1a4d82fc | 269 | if (IdxVal >= ATy->getNumElements()) return nullptr; |
223e47cc LB |
270 | EltTy = ATy->getElementType(); |
271 | } else { | |
1a4d82fc | 272 | return nullptr; // Unknown type. |
223e47cc LB |
273 | } |
274 | ||
275 | LaterIndices.push_back(IdxVal); | |
276 | } | |
277 | ||
278 | enum { Overdefined = -3, Undefined = -2 }; | |
279 | ||
280 | // Variables for our state machines. | |
281 | ||
282 | // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form | |
283 | // "i == 47 | i == 87", where 47 is the first index the condition is true for, | |
284 | // and 87 is the second (and last) index. FirstTrueElement is -2 when | |
285 | // undefined, otherwise set to the first true element. SecondTrueElement is | |
286 | // -2 when undefined, -3 when overdefined and >= 0 when that index is true. | |
287 | int FirstTrueElement = Undefined, SecondTrueElement = Undefined; | |
288 | ||
289 | // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the | |
290 | // form "i != 47 & i != 87". Same state transitions as for true elements. | |
291 | int FirstFalseElement = Undefined, SecondFalseElement = Undefined; | |
292 | ||
293 | /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these | |
294 | /// define a state machine that triggers for ranges of values that the index | |
295 | /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. | |
296 | /// This is -2 when undefined, -3 when overdefined, and otherwise the last | |
297 | /// index in the range (inclusive). We use -2 for undefined here because we | |
298 | /// use relative comparisons and don't want 0-1 to match -1. | |
299 | int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; | |
300 | ||
301 | // MagicBitvector - This is a magic bitvector where we set a bit if the | |
302 | // comparison is true for element 'i'. If there are 64 elements or less in | |
303 | // the array, this will fully represent all the comparison results. | |
304 | uint64_t MagicBitvector = 0; | |
305 | ||
306 | ||
307 | // Scan the array and see if one of our patterns matches. | |
308 | Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); | |
309 | for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { | |
310 | Constant *Elt = Init->getAggregateElement(i); | |
1a4d82fc | 311 | if (!Elt) return nullptr; |
223e47cc LB |
312 | |
313 | // If this is indexing an array of structures, get the structure element. | |
314 | if (!LaterIndices.empty()) | |
315 | Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); | |
316 | ||
317 | // If the element is masked, handle it. | |
318 | if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); | |
319 | ||
320 | // Find out if the comparison would be true or false for the i'th element. | |
321 | Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, | |
1a4d82fc | 322 | CompareRHS, DL, TLI); |
223e47cc LB |
323 | // If the result is undef for this element, ignore it. |
324 | if (isa<UndefValue>(C)) { | |
325 | // Extend range state machines to cover this element in case there is an | |
326 | // undef in the middle of the range. | |
327 | if (TrueRangeEnd == (int)i-1) | |
328 | TrueRangeEnd = i; | |
329 | if (FalseRangeEnd == (int)i-1) | |
330 | FalseRangeEnd = i; | |
331 | continue; | |
332 | } | |
333 | ||
334 | // If we can't compute the result for any of the elements, we have to give | |
335 | // up evaluating the entire conditional. | |
1a4d82fc | 336 | if (!isa<ConstantInt>(C)) return nullptr; |
223e47cc LB |
337 | |
338 | // Otherwise, we know if the comparison is true or false for this element, | |
339 | // update our state machines. | |
340 | bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); | |
341 | ||
342 | // State machine for single/double/range index comparison. | |
343 | if (IsTrueForElt) { | |
344 | // Update the TrueElement state machine. | |
345 | if (FirstTrueElement == Undefined) | |
346 | FirstTrueElement = TrueRangeEnd = i; // First true element. | |
347 | else { | |
348 | // Update double-compare state machine. | |
349 | if (SecondTrueElement == Undefined) | |
350 | SecondTrueElement = i; | |
351 | else | |
352 | SecondTrueElement = Overdefined; | |
353 | ||
354 | // Update range state machine. | |
355 | if (TrueRangeEnd == (int)i-1) | |
356 | TrueRangeEnd = i; | |
357 | else | |
358 | TrueRangeEnd = Overdefined; | |
359 | } | |
360 | } else { | |
361 | // Update the FalseElement state machine. | |
362 | if (FirstFalseElement == Undefined) | |
363 | FirstFalseElement = FalseRangeEnd = i; // First false element. | |
364 | else { | |
365 | // Update double-compare state machine. | |
366 | if (SecondFalseElement == Undefined) | |
367 | SecondFalseElement = i; | |
368 | else | |
369 | SecondFalseElement = Overdefined; | |
370 | ||
371 | // Update range state machine. | |
372 | if (FalseRangeEnd == (int)i-1) | |
373 | FalseRangeEnd = i; | |
374 | else | |
375 | FalseRangeEnd = Overdefined; | |
376 | } | |
377 | } | |
378 | ||
379 | ||
380 | // If this element is in range, update our magic bitvector. | |
381 | if (i < 64 && IsTrueForElt) | |
382 | MagicBitvector |= 1ULL << i; | |
383 | ||
384 | // If all of our states become overdefined, bail out early. Since the | |
385 | // predicate is expensive, only check it every 8 elements. This is only | |
386 | // really useful for really huge arrays. | |
387 | if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && | |
388 | SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && | |
389 | FalseRangeEnd == Overdefined) | |
1a4d82fc | 390 | return nullptr; |
223e47cc LB |
391 | } |
392 | ||
393 | // Now that we've scanned the entire array, emit our new comparison(s). We | |
394 | // order the state machines in complexity of the generated code. | |
395 | Value *Idx = GEP->getOperand(2); | |
396 | ||
397 | // If the index is larger than the pointer size of the target, truncate the | |
398 | // index down like the GEP would do implicitly. We don't have to do this for | |
399 | // an inbounds GEP because the index can't be out of range. | |
1a4d82fc JJ |
400 | if (!GEP->isInBounds()) { |
401 | Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); | |
402 | unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); | |
403 | if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) | |
404 | Idx = Builder->CreateTrunc(Idx, IntPtrTy); | |
405 | } | |
223e47cc LB |
406 | |
407 | // If the comparison is only true for one or two elements, emit direct | |
408 | // comparisons. | |
409 | if (SecondTrueElement != Overdefined) { | |
410 | // None true -> false. | |
411 | if (FirstTrueElement == Undefined) | |
1a4d82fc | 412 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc LB |
413 | |
414 | Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); | |
415 | ||
416 | // True for one element -> 'i == 47'. | |
417 | if (SecondTrueElement == Undefined) | |
418 | return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); | |
419 | ||
420 | // True for two elements -> 'i == 47 | i == 72'. | |
421 | Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); | |
422 | Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); | |
423 | Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); | |
424 | return BinaryOperator::CreateOr(C1, C2); | |
425 | } | |
426 | ||
427 | // If the comparison is only false for one or two elements, emit direct | |
428 | // comparisons. | |
429 | if (SecondFalseElement != Overdefined) { | |
430 | // None false -> true. | |
431 | if (FirstFalseElement == Undefined) | |
1a4d82fc | 432 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
433 | |
434 | Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); | |
435 | ||
436 | // False for one element -> 'i != 47'. | |
437 | if (SecondFalseElement == Undefined) | |
438 | return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); | |
439 | ||
440 | // False for two elements -> 'i != 47 & i != 72'. | |
441 | Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); | |
442 | Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); | |
443 | Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); | |
444 | return BinaryOperator::CreateAnd(C1, C2); | |
445 | } | |
446 | ||
447 | // If the comparison can be replaced with a range comparison for the elements | |
448 | // where it is true, emit the range check. | |
449 | if (TrueRangeEnd != Overdefined) { | |
450 | assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); | |
451 | ||
452 | // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). | |
453 | if (FirstTrueElement) { | |
454 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); | |
455 | Idx = Builder->CreateAdd(Idx, Offs); | |
456 | } | |
457 | ||
458 | Value *End = ConstantInt::get(Idx->getType(), | |
459 | TrueRangeEnd-FirstTrueElement+1); | |
460 | return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); | |
461 | } | |
462 | ||
463 | // False range check. | |
464 | if (FalseRangeEnd != Overdefined) { | |
465 | assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); | |
466 | // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). | |
467 | if (FirstFalseElement) { | |
468 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); | |
469 | Idx = Builder->CreateAdd(Idx, Offs); | |
470 | } | |
471 | ||
472 | Value *End = ConstantInt::get(Idx->getType(), | |
473 | FalseRangeEnd-FirstFalseElement); | |
474 | return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); | |
475 | } | |
476 | ||
477 | ||
1a4d82fc | 478 | // If a magic bitvector captures the entire comparison state |
223e47cc LB |
479 | // of this load, replace it with computation that does: |
480 | // ((magic_cst >> i) & 1) != 0 | |
1a4d82fc JJ |
481 | { |
482 | Type *Ty = nullptr; | |
483 | ||
484 | // Look for an appropriate type: | |
485 | // - The type of Idx if the magic fits | |
486 | // - The smallest fitting legal type if we have a DataLayout | |
487 | // - Default to i32 | |
488 | if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) | |
489 | Ty = Idx->getType(); | |
490 | else if (DL) | |
491 | Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); | |
492 | else if (ArrayElementCount <= 32) | |
223e47cc | 493 | Ty = Type::getInt32Ty(Init->getContext()); |
1a4d82fc JJ |
494 | |
495 | if (Ty) { | |
496 | Value *V = Builder->CreateIntCast(Idx, Ty, false); | |
497 | V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); | |
498 | V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); | |
499 | return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); | |
500 | } | |
223e47cc LB |
501 | } |
502 | ||
1a4d82fc | 503 | return nullptr; |
223e47cc LB |
504 | } |
505 | ||
506 | ||
507 | /// EvaluateGEPOffsetExpression - Return a value that can be used to compare | |
508 | /// the *offset* implied by a GEP to zero. For example, if we have &A[i], we | |
509 | /// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can | |
510 | /// be complex, and scales are involved. The above expression would also be | |
511 | /// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). | |
512 | /// This later form is less amenable to optimization though, and we are allowed | |
513 | /// to generate the first by knowing that pointer arithmetic doesn't overflow. | |
514 | /// | |
515 | /// If we can't emit an optimized form for this expression, this returns null. | |
516 | /// | |
517 | static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { | |
1a4d82fc | 518 | const DataLayout &DL = *IC.getDataLayout(); |
223e47cc LB |
519 | gep_type_iterator GTI = gep_type_begin(GEP); |
520 | ||
521 | // Check to see if this gep only has a single variable index. If so, and if | |
522 | // any constant indices are a multiple of its scale, then we can compute this | |
523 | // in terms of the scale of the variable index. For example, if the GEP | |
524 | // implies an offset of "12 + i*4", then we can codegen this as "3 + i", | |
525 | // because the expression will cross zero at the same point. | |
526 | unsigned i, e = GEP->getNumOperands(); | |
527 | int64_t Offset = 0; | |
528 | for (i = 1; i != e; ++i, ++GTI) { | |
529 | if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { | |
530 | // Compute the aggregate offset of constant indices. | |
531 | if (CI->isZero()) continue; | |
532 | ||
533 | // Handle a struct index, which adds its field offset to the pointer. | |
534 | if (StructType *STy = dyn_cast<StructType>(*GTI)) { | |
1a4d82fc | 535 | Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); |
223e47cc | 536 | } else { |
1a4d82fc | 537 | uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); |
223e47cc LB |
538 | Offset += Size*CI->getSExtValue(); |
539 | } | |
540 | } else { | |
541 | // Found our variable index. | |
542 | break; | |
543 | } | |
544 | } | |
545 | ||
546 | // If there are no variable indices, we must have a constant offset, just | |
547 | // evaluate it the general way. | |
1a4d82fc | 548 | if (i == e) return nullptr; |
223e47cc LB |
549 | |
550 | Value *VariableIdx = GEP->getOperand(i); | |
551 | // Determine the scale factor of the variable element. For example, this is | |
552 | // 4 if the variable index is into an array of i32. | |
1a4d82fc | 553 | uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType()); |
223e47cc LB |
554 | |
555 | // Verify that there are no other variable indices. If so, emit the hard way. | |
556 | for (++i, ++GTI; i != e; ++i, ++GTI) { | |
557 | ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); | |
1a4d82fc | 558 | if (!CI) return nullptr; |
223e47cc LB |
559 | |
560 | // Compute the aggregate offset of constant indices. | |
561 | if (CI->isZero()) continue; | |
562 | ||
563 | // Handle a struct index, which adds its field offset to the pointer. | |
564 | if (StructType *STy = dyn_cast<StructType>(*GTI)) { | |
1a4d82fc | 565 | Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); |
223e47cc | 566 | } else { |
1a4d82fc | 567 | uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); |
223e47cc LB |
568 | Offset += Size*CI->getSExtValue(); |
569 | } | |
570 | } | |
571 | ||
1a4d82fc JJ |
572 | |
573 | ||
223e47cc LB |
574 | // Okay, we know we have a single variable index, which must be a |
575 | // pointer/array/vector index. If there is no offset, life is simple, return | |
576 | // the index. | |
1a4d82fc JJ |
577 | Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType()); |
578 | unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth(); | |
223e47cc LB |
579 | if (Offset == 0) { |
580 | // Cast to intptrty in case a truncation occurs. If an extension is needed, | |
581 | // we don't need to bother extending: the extension won't affect where the | |
582 | // computation crosses zero. | |
583 | if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { | |
223e47cc LB |
584 | VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); |
585 | } | |
586 | return VariableIdx; | |
587 | } | |
588 | ||
589 | // Otherwise, there is an index. The computation we will do will be modulo | |
590 | // the pointer size, so get it. | |
591 | uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); | |
592 | ||
593 | Offset &= PtrSizeMask; | |
594 | VariableScale &= PtrSizeMask; | |
595 | ||
596 | // To do this transformation, any constant index must be a multiple of the | |
597 | // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", | |
598 | // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a | |
599 | // multiple of the variable scale. | |
600 | int64_t NewOffs = Offset / (int64_t)VariableScale; | |
601 | if (Offset != NewOffs*(int64_t)VariableScale) | |
1a4d82fc | 602 | return nullptr; |
223e47cc LB |
603 | |
604 | // Okay, we can do this evaluation. Start by converting the index to intptr. | |
223e47cc LB |
605 | if (VariableIdx->getType() != IntPtrTy) |
606 | VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, | |
607 | true /*Signed*/); | |
608 | Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); | |
609 | return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); | |
610 | } | |
611 | ||
612 | /// FoldGEPICmp - Fold comparisons between a GEP instruction and something | |
613 | /// else. At this point we know that the GEP is on the LHS of the comparison. | |
614 | Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, | |
615 | ICmpInst::Predicate Cond, | |
616 | Instruction &I) { | |
617 | // Don't transform signed compares of GEPs into index compares. Even if the | |
618 | // GEP is inbounds, the final add of the base pointer can have signed overflow | |
619 | // and would change the result of the icmp. | |
620 | // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be | |
621 | // the maximum signed value for the pointer type. | |
622 | if (ICmpInst::isSigned(Cond)) | |
1a4d82fc | 623 | return nullptr; |
223e47cc | 624 | |
1a4d82fc JJ |
625 | // Look through bitcasts and addrspacecasts. We do not however want to remove |
626 | // 0 GEPs. | |
627 | if (!isa<GetElementPtrInst>(RHS)) | |
628 | RHS = RHS->stripPointerCasts(); | |
223e47cc LB |
629 | |
630 | Value *PtrBase = GEPLHS->getOperand(0); | |
1a4d82fc | 631 | if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { |
223e47cc LB |
632 | // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). |
633 | // This transformation (ignoring the base and scales) is valid because we | |
634 | // know pointers can't overflow since the gep is inbounds. See if we can | |
635 | // output an optimized form. | |
636 | Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); | |
637 | ||
638 | // If not, synthesize the offset the hard way. | |
1a4d82fc | 639 | if (!Offset) |
223e47cc LB |
640 | Offset = EmitGEPOffset(GEPLHS); |
641 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, | |
642 | Constant::getNullValue(Offset->getType())); | |
643 | } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { | |
644 | // If the base pointers are different, but the indices are the same, just | |
645 | // compare the base pointer. | |
646 | if (PtrBase != GEPRHS->getOperand(0)) { | |
647 | bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); | |
648 | IndicesTheSame &= GEPLHS->getOperand(0)->getType() == | |
649 | GEPRHS->getOperand(0)->getType(); | |
650 | if (IndicesTheSame) | |
651 | for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) | |
652 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { | |
653 | IndicesTheSame = false; | |
654 | break; | |
655 | } | |
656 | ||
657 | // If all indices are the same, just compare the base pointers. | |
658 | if (IndicesTheSame) | |
1a4d82fc | 659 | return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); |
223e47cc LB |
660 | |
661 | // If we're comparing GEPs with two base pointers that only differ in type | |
662 | // and both GEPs have only constant indices or just one use, then fold | |
663 | // the compare with the adjusted indices. | |
1a4d82fc | 664 | if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() && |
223e47cc LB |
665 | (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && |
666 | (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && | |
667 | PtrBase->stripPointerCasts() == | |
668 | GEPRHS->getOperand(0)->stripPointerCasts()) { | |
1a4d82fc JJ |
669 | Value *LOffset = EmitGEPOffset(GEPLHS); |
670 | Value *ROffset = EmitGEPOffset(GEPRHS); | |
671 | ||
672 | // If we looked through an addrspacecast between different sized address | |
673 | // spaces, the LHS and RHS pointers are different sized | |
674 | // integers. Truncate to the smaller one. | |
675 | Type *LHSIndexTy = LOffset->getType(); | |
676 | Type *RHSIndexTy = ROffset->getType(); | |
677 | if (LHSIndexTy != RHSIndexTy) { | |
678 | if (LHSIndexTy->getPrimitiveSizeInBits() < | |
679 | RHSIndexTy->getPrimitiveSizeInBits()) { | |
680 | ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); | |
681 | } else | |
682 | LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); | |
683 | } | |
684 | ||
223e47cc | 685 | Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), |
1a4d82fc | 686 | LOffset, ROffset); |
223e47cc LB |
687 | return ReplaceInstUsesWith(I, Cmp); |
688 | } | |
689 | ||
690 | // Otherwise, the base pointers are different and the indices are | |
691 | // different, bail out. | |
1a4d82fc | 692 | return nullptr; |
223e47cc LB |
693 | } |
694 | ||
695 | // If one of the GEPs has all zero indices, recurse. | |
1a4d82fc | 696 | if (GEPLHS->hasAllZeroIndices()) |
223e47cc | 697 | return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), |
1a4d82fc | 698 | ICmpInst::getSwappedPredicate(Cond), I); |
223e47cc LB |
699 | |
700 | // If the other GEP has all zero indices, recurse. | |
1a4d82fc | 701 | if (GEPRHS->hasAllZeroIndices()) |
223e47cc LB |
702 | return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); |
703 | ||
704 | bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); | |
705 | if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { | |
706 | // If the GEPs only differ by one index, compare it. | |
707 | unsigned NumDifferences = 0; // Keep track of # differences. | |
708 | unsigned DiffOperand = 0; // The operand that differs. | |
709 | for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) | |
710 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { | |
711 | if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != | |
712 | GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { | |
713 | // Irreconcilable differences. | |
714 | NumDifferences = 2; | |
715 | break; | |
716 | } else { | |
717 | if (NumDifferences++) break; | |
718 | DiffOperand = i; | |
719 | } | |
720 | } | |
721 | ||
722 | if (NumDifferences == 0) // SAME GEP? | |
723 | return ReplaceInstUsesWith(I, // No comparison is needed here. | |
1a4d82fc | 724 | Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); |
223e47cc LB |
725 | |
726 | else if (NumDifferences == 1 && GEPsInBounds) { | |
727 | Value *LHSV = GEPLHS->getOperand(DiffOperand); | |
728 | Value *RHSV = GEPRHS->getOperand(DiffOperand); | |
729 | // Make sure we do a signed comparison here. | |
730 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); | |
731 | } | |
732 | } | |
733 | ||
734 | // Only lower this if the icmp is the only user of the GEP or if we expect | |
735 | // the result to fold to a constant! | |
1a4d82fc | 736 | if (DL && |
223e47cc LB |
737 | GEPsInBounds && |
738 | (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && | |
739 | (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { | |
740 | // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) | |
741 | Value *L = EmitGEPOffset(GEPLHS); | |
742 | Value *R = EmitGEPOffset(GEPRHS); | |
743 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); | |
744 | } | |
745 | } | |
1a4d82fc | 746 | return nullptr; |
223e47cc LB |
747 | } |
748 | ||
749 | /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". | |
1a4d82fc | 750 | Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, |
223e47cc | 751 | Value *X, ConstantInt *CI, |
1a4d82fc | 752 | ICmpInst::Predicate Pred) { |
223e47cc LB |
753 | // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, |
754 | // so the values can never be equal. Similarly for all other "or equals" | |
755 | // operators. | |
756 | ||
757 | // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 | |
758 | // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 | |
759 | // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 | |
760 | if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { | |
761 | Value *R = | |
762 | ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); | |
763 | return new ICmpInst(ICmpInst::ICMP_UGT, X, R); | |
764 | } | |
765 | ||
766 | // (X+1) >u X --> X <u (0-1) --> X != 255 | |
767 | // (X+2) >u X --> X <u (0-2) --> X <u 254 | |
768 | // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 | |
769 | if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) | |
770 | return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); | |
771 | ||
772 | unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); | |
773 | ConstantInt *SMax = ConstantInt::get(X->getContext(), | |
774 | APInt::getSignedMaxValue(BitWidth)); | |
775 | ||
776 | // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 | |
777 | // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 | |
778 | // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 | |
779 | // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 | |
780 | // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 | |
781 | // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 | |
782 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) | |
783 | return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); | |
784 | ||
785 | // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 | |
786 | // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 | |
787 | // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 | |
788 | // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 | |
789 | // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 | |
790 | // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 | |
791 | ||
792 | assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); | |
1a4d82fc | 793 | Constant *C = Builder->getInt(CI->getValue()-1); |
223e47cc LB |
794 | return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); |
795 | } | |
796 | ||
797 | /// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS | |
798 | /// and CmpRHS are both known to be integer constants. | |
799 | Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, | |
800 | ConstantInt *DivRHS) { | |
801 | ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); | |
802 | const APInt &CmpRHSV = CmpRHS->getValue(); | |
803 | ||
804 | // FIXME: If the operand types don't match the type of the divide | |
805 | // then don't attempt this transform. The code below doesn't have the | |
806 | // logic to deal with a signed divide and an unsigned compare (and | |
807 | // vice versa). This is because (x /s C1) <s C2 produces different | |
808 | // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even | |
809 | // (x /u C1) <u C2. Simply casting the operands and result won't | |
810 | // work. :( The if statement below tests that condition and bails | |
811 | // if it finds it. | |
812 | bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; | |
813 | if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) | |
1a4d82fc | 814 | return nullptr; |
223e47cc | 815 | if (DivRHS->isZero()) |
1a4d82fc | 816 | return nullptr; // The ProdOV computation fails on divide by zero. |
223e47cc | 817 | if (DivIsSigned && DivRHS->isAllOnesValue()) |
1a4d82fc | 818 | return nullptr; // The overflow computation also screws up here |
223e47cc LB |
819 | if (DivRHS->isOne()) { |
820 | // This eliminates some funny cases with INT_MIN. | |
821 | ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. | |
822 | return &ICI; | |
823 | } | |
824 | ||
825 | // Compute Prod = CI * DivRHS. We are essentially solving an equation | |
826 | // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and | |
827 | // C2 (CI). By solving for X we can turn this into a range check | |
828 | // instead of computing a divide. | |
829 | Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); | |
830 | ||
831 | // Determine if the product overflows by seeing if the product is | |
832 | // not equal to the divide. Make sure we do the same kind of divide | |
833 | // as in the LHS instruction that we're folding. | |
834 | bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : | |
835 | ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; | |
836 | ||
837 | // Get the ICmp opcode | |
838 | ICmpInst::Predicate Pred = ICI.getPredicate(); | |
839 | ||
840 | /// If the division is known to be exact, then there is no remainder from the | |
841 | /// divide, so the covered range size is unit, otherwise it is the divisor. | |
842 | ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; | |
843 | ||
844 | // Figure out the interval that is being checked. For example, a comparison | |
845 | // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). | |
846 | // Compute this interval based on the constants involved and the signedness of | |
847 | // the compare/divide. This computes a half-open interval, keeping track of | |
848 | // whether either value in the interval overflows. After analysis each | |
849 | // overflow variable is set to 0 if it's corresponding bound variable is valid | |
850 | // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. | |
851 | int LoOverflow = 0, HiOverflow = 0; | |
1a4d82fc | 852 | Constant *LoBound = nullptr, *HiBound = nullptr; |
223e47cc LB |
853 | |
854 | if (!DivIsSigned) { // udiv | |
855 | // e.g. X/5 op 3 --> [15, 20) | |
856 | LoBound = Prod; | |
857 | HiOverflow = LoOverflow = ProdOV; | |
858 | if (!HiOverflow) { | |
859 | // If this is not an exact divide, then many values in the range collapse | |
860 | // to the same result value. | |
861 | HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); | |
862 | } | |
863 | ||
864 | } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. | |
865 | if (CmpRHSV == 0) { // (X / pos) op 0 | |
866 | // Can't overflow. e.g. X/2 op 0 --> [-1, 2) | |
867 | LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); | |
868 | HiBound = RangeSize; | |
869 | } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos | |
870 | LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) | |
871 | HiOverflow = LoOverflow = ProdOV; | |
872 | if (!HiOverflow) | |
873 | HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); | |
874 | } else { // (X / pos) op neg | |
875 | // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) | |
876 | HiBound = AddOne(Prod); | |
877 | LoOverflow = HiOverflow = ProdOV ? -1 : 0; | |
878 | if (!LoOverflow) { | |
879 | ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); | |
880 | LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; | |
881 | } | |
882 | } | |
883 | } else if (DivRHS->isNegative()) { // Divisor is < 0. | |
884 | if (DivI->isExact()) | |
885 | RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); | |
886 | if (CmpRHSV == 0) { // (X / neg) op 0 | |
887 | // e.g. X/-5 op 0 --> [-4, 5) | |
888 | LoBound = AddOne(RangeSize); | |
889 | HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); | |
890 | if (HiBound == DivRHS) { // -INTMIN = INTMIN | |
891 | HiOverflow = 1; // [INTMIN+1, overflow) | |
1a4d82fc | 892 | HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN |
223e47cc LB |
893 | } |
894 | } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos | |
895 | // e.g. X/-5 op 3 --> [-19, -14) | |
896 | HiBound = AddOne(Prod); | |
897 | HiOverflow = LoOverflow = ProdOV ? -1 : 0; | |
898 | if (!LoOverflow) | |
899 | LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; | |
900 | } else { // (X / neg) op neg | |
901 | LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) | |
902 | LoOverflow = HiOverflow = ProdOV; | |
903 | if (!HiOverflow) | |
904 | HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); | |
905 | } | |
906 | ||
907 | // Dividing by a negative swaps the condition. LT <-> GT | |
908 | Pred = ICmpInst::getSwappedPredicate(Pred); | |
909 | } | |
910 | ||
911 | Value *X = DivI->getOperand(0); | |
912 | switch (Pred) { | |
913 | default: llvm_unreachable("Unhandled icmp opcode!"); | |
914 | case ICmpInst::ICMP_EQ: | |
915 | if (LoOverflow && HiOverflow) | |
1a4d82fc | 916 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc LB |
917 | if (HiOverflow) |
918 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : | |
919 | ICmpInst::ICMP_UGE, X, LoBound); | |
920 | if (LoOverflow) | |
921 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : | |
922 | ICmpInst::ICMP_ULT, X, HiBound); | |
923 | return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, | |
924 | DivIsSigned, true)); | |
925 | case ICmpInst::ICMP_NE: | |
926 | if (LoOverflow && HiOverflow) | |
1a4d82fc | 927 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
928 | if (HiOverflow) |
929 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : | |
930 | ICmpInst::ICMP_ULT, X, LoBound); | |
931 | if (LoOverflow) | |
932 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : | |
933 | ICmpInst::ICMP_UGE, X, HiBound); | |
934 | return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, | |
935 | DivIsSigned, false)); | |
936 | case ICmpInst::ICMP_ULT: | |
937 | case ICmpInst::ICMP_SLT: | |
938 | if (LoOverflow == +1) // Low bound is greater than input range. | |
1a4d82fc | 939 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc | 940 | if (LoOverflow == -1) // Low bound is less than input range. |
1a4d82fc | 941 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc LB |
942 | return new ICmpInst(Pred, X, LoBound); |
943 | case ICmpInst::ICMP_UGT: | |
944 | case ICmpInst::ICMP_SGT: | |
945 | if (HiOverflow == +1) // High bound greater than input range. | |
1a4d82fc | 946 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc | 947 | if (HiOverflow == -1) // High bound less than input range. |
1a4d82fc | 948 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
949 | if (Pred == ICmpInst::ICMP_UGT) |
950 | return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); | |
951 | return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); | |
952 | } | |
953 | } | |
954 | ||
955 | /// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". | |
956 | Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, | |
957 | ConstantInt *ShAmt) { | |
958 | const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); | |
959 | ||
960 | // Check that the shift amount is in range. If not, don't perform | |
961 | // undefined shifts. When the shift is visited it will be | |
962 | // simplified. | |
963 | uint32_t TypeBits = CmpRHSV.getBitWidth(); | |
964 | uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); | |
965 | if (ShAmtVal >= TypeBits || ShAmtVal == 0) | |
1a4d82fc | 966 | return nullptr; |
223e47cc LB |
967 | |
968 | if (!ICI.isEquality()) { | |
969 | // If we have an unsigned comparison and an ashr, we can't simplify this. | |
970 | // Similarly for signed comparisons with lshr. | |
971 | if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) | |
1a4d82fc | 972 | return nullptr; |
223e47cc LB |
973 | |
974 | // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv | |
975 | // by a power of 2. Since we already have logic to simplify these, | |
976 | // transform to div and then simplify the resultant comparison. | |
977 | if (Shr->getOpcode() == Instruction::AShr && | |
978 | (!Shr->isExact() || ShAmtVal == TypeBits - 1)) | |
1a4d82fc | 979 | return nullptr; |
223e47cc LB |
980 | |
981 | // Revisit the shift (to delete it). | |
982 | Worklist.Add(Shr); | |
983 | ||
984 | Constant *DivCst = | |
985 | ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); | |
986 | ||
987 | Value *Tmp = | |
988 | Shr->getOpcode() == Instruction::AShr ? | |
989 | Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : | |
990 | Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); | |
991 | ||
992 | ICI.setOperand(0, Tmp); | |
993 | ||
994 | // If the builder folded the binop, just return it. | |
995 | BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); | |
1a4d82fc | 996 | if (!TheDiv) |
223e47cc LB |
997 | return &ICI; |
998 | ||
999 | // Otherwise, fold this div/compare. | |
1000 | assert(TheDiv->getOpcode() == Instruction::SDiv || | |
1001 | TheDiv->getOpcode() == Instruction::UDiv); | |
1002 | ||
1003 | Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); | |
1004 | assert(Res && "This div/cst should have folded!"); | |
1005 | return Res; | |
1006 | } | |
1007 | ||
1008 | ||
1009 | // If we are comparing against bits always shifted out, the | |
1010 | // comparison cannot succeed. | |
1011 | APInt Comp = CmpRHSV << ShAmtVal; | |
1a4d82fc | 1012 | ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp); |
223e47cc LB |
1013 | if (Shr->getOpcode() == Instruction::LShr) |
1014 | Comp = Comp.lshr(ShAmtVal); | |
1015 | else | |
1016 | Comp = Comp.ashr(ShAmtVal); | |
1017 | ||
1018 | if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. | |
1019 | bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; | |
1a4d82fc | 1020 | Constant *Cst = Builder->getInt1(IsICMP_NE); |
223e47cc LB |
1021 | return ReplaceInstUsesWith(ICI, Cst); |
1022 | } | |
1023 | ||
1024 | // Otherwise, check to see if the bits shifted out are known to be zero. | |
1025 | // If so, we can compare against the unshifted value: | |
1026 | // (X & 4) >> 1 == 2 --> (X & 4) == 4. | |
1027 | if (Shr->hasOneUse() && Shr->isExact()) | |
1028 | return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); | |
1029 | ||
1030 | if (Shr->hasOneUse()) { | |
1031 | // Otherwise strength reduce the shift into an and. | |
1032 | APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); | |
1a4d82fc | 1033 | Constant *Mask = Builder->getInt(Val); |
223e47cc LB |
1034 | |
1035 | Value *And = Builder->CreateAnd(Shr->getOperand(0), | |
1036 | Mask, Shr->getName()+".mask"); | |
1037 | return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); | |
1038 | } | |
1a4d82fc | 1039 | return nullptr; |
223e47cc LB |
1040 | } |
1041 | ||
1a4d82fc JJ |
1042 | /// FoldICmpCstShrCst - Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" -> |
1043 | /// (icmp eq/ne A, Log2(const2/const1)) -> | |
1044 | /// (icmp eq/ne A, Log2(const2) - Log2(const1)). | |
1045 | Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A, | |
1046 | ConstantInt *CI1, | |
1047 | ConstantInt *CI2) { | |
1048 | assert(I.isEquality() && "Cannot fold icmp gt/lt"); | |
1049 | ||
1050 | auto getConstant = [&I, this](bool IsTrue) { | |
1051 | if (I.getPredicate() == I.ICMP_NE) | |
1052 | IsTrue = !IsTrue; | |
1053 | return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue)); | |
1054 | }; | |
1055 | ||
1056 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { | |
1057 | if (I.getPredicate() == I.ICMP_NE) | |
1058 | Pred = CmpInst::getInversePredicate(Pred); | |
1059 | return new ICmpInst(Pred, LHS, RHS); | |
1060 | }; | |
1061 | ||
1062 | APInt AP1 = CI1->getValue(); | |
1063 | APInt AP2 = CI2->getValue(); | |
1064 | ||
85aaf69f SL |
1065 | // Don't bother doing any work for cases which InstSimplify handles. |
1066 | if (AP2 == 0) | |
1067 | return nullptr; | |
1068 | bool IsAShr = isa<AShrOperator>(Op); | |
1069 | if (IsAShr) { | |
1070 | if (AP2.isAllOnesValue()) | |
1071 | return nullptr; | |
1072 | if (AP2.isNegative() != AP1.isNegative()) | |
1073 | return nullptr; | |
1074 | if (AP2.sgt(AP1)) | |
1075 | return nullptr; | |
1076 | } | |
1a4d82fc | 1077 | |
85aaf69f | 1078 | if (!AP1) |
1a4d82fc JJ |
1079 | // 'A' must be large enough to shift out the highest set bit. |
1080 | return getICmp(I.ICMP_UGT, A, | |
1081 | ConstantInt::get(A->getType(), AP2.logBase2())); | |
1a4d82fc | 1082 | |
85aaf69f | 1083 | if (AP1 == AP2) |
1a4d82fc | 1084 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); |
1a4d82fc JJ |
1085 | |
1086 | // Get the distance between the highest bit that's set. | |
1087 | int Shift; | |
85aaf69f SL |
1088 | // Both the constants are negative, take their positive to calculate log. |
1089 | if (IsAShr && AP1.isNegative()) | |
1090 | // Get the ones' complement of AP2 and AP1 when computing the distance. | |
1091 | Shift = (~AP2).logBase2() - (~AP1).logBase2(); | |
1a4d82fc JJ |
1092 | else |
1093 | Shift = AP2.logBase2() - AP1.logBase2(); | |
1094 | ||
85aaf69f SL |
1095 | if (Shift > 0) { |
1096 | if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift)) | |
1097 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); | |
1098 | } | |
1099 | // Shifting const2 will never be equal to const1. | |
1100 | return getConstant(false); | |
1101 | } | |
1102 | ||
1103 | /// FoldICmpCstShlCst - Handle "(icmp eq/ne (shl const2, A), const1)" -> | |
1104 | /// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)). | |
1105 | Instruction *InstCombiner::FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A, | |
1106 | ConstantInt *CI1, | |
1107 | ConstantInt *CI2) { | |
1108 | assert(I.isEquality() && "Cannot fold icmp gt/lt"); | |
1109 | ||
1110 | auto getConstant = [&I, this](bool IsTrue) { | |
1111 | if (I.getPredicate() == I.ICMP_NE) | |
1112 | IsTrue = !IsTrue; | |
1113 | return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue)); | |
1114 | }; | |
1115 | ||
1116 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { | |
1117 | if (I.getPredicate() == I.ICMP_NE) | |
1118 | Pred = CmpInst::getInversePredicate(Pred); | |
1119 | return new ICmpInst(Pred, LHS, RHS); | |
1120 | }; | |
1121 | ||
1122 | APInt AP1 = CI1->getValue(); | |
1123 | APInt AP2 = CI2->getValue(); | |
1124 | ||
1125 | // Don't bother doing any work for cases which InstSimplify handles. | |
1126 | if (AP2 == 0) | |
1127 | return nullptr; | |
1128 | ||
1129 | unsigned AP2TrailingZeros = AP2.countTrailingZeros(); | |
1130 | ||
1131 | if (!AP1 && AP2TrailingZeros != 0) | |
1132 | return getICmp(I.ICMP_UGE, A, | |
1133 | ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros)); | |
1134 | ||
1135 | if (AP1 == AP2) | |
1136 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); | |
1137 | ||
1138 | // Get the distance between the lowest bits that are set. | |
1139 | int Shift = AP1.countTrailingZeros() - AP2TrailingZeros; | |
1140 | ||
1141 | if (Shift > 0 && AP2.shl(Shift) == AP1) | |
1a4d82fc JJ |
1142 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); |
1143 | ||
1144 | // Shifting const2 will never be equal to const1. | |
1145 | return getConstant(false); | |
1146 | } | |
223e47cc LB |
1147 | |
1148 | /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". | |
1149 | /// | |
1150 | Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, | |
1151 | Instruction *LHSI, | |
1152 | ConstantInt *RHS) { | |
1153 | const APInt &RHSV = RHS->getValue(); | |
1154 | ||
1155 | switch (LHSI->getOpcode()) { | |
1156 | case Instruction::Trunc: | |
1157 | if (ICI.isEquality() && LHSI->hasOneUse()) { | |
1158 | // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all | |
1159 | // of the high bits truncated out of x are known. | |
1160 | unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), | |
1161 | SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); | |
1162 | APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); | |
1a4d82fc | 1163 | computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI); |
223e47cc LB |
1164 | |
1165 | // If all the high bits are known, we can do this xform. | |
1166 | if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { | |
1167 | // Pull in the high bits from known-ones set. | |
1168 | APInt NewRHS = RHS->getValue().zext(SrcBits); | |
1169 | NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); | |
1170 | return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), | |
1a4d82fc | 1171 | Builder->getInt(NewRHS)); |
223e47cc LB |
1172 | } |
1173 | } | |
1174 | break; | |
1175 | ||
1a4d82fc JJ |
1176 | case Instruction::Xor: // (icmp pred (xor X, XorCst), CI) |
1177 | if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { | |
223e47cc LB |
1178 | // If this is a comparison that tests the signbit (X < 0) or (x > -1), |
1179 | // fold the xor. | |
1180 | if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || | |
1181 | (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { | |
1182 | Value *CompareVal = LHSI->getOperand(0); | |
1183 | ||
1a4d82fc | 1184 | // If the sign bit of the XorCst is not set, there is no change to |
223e47cc | 1185 | // the operation, just stop using the Xor. |
1a4d82fc | 1186 | if (!XorCst->isNegative()) { |
223e47cc LB |
1187 | ICI.setOperand(0, CompareVal); |
1188 | Worklist.Add(LHSI); | |
1189 | return &ICI; | |
1190 | } | |
1191 | ||
1192 | // Was the old condition true if the operand is positive? | |
1193 | bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; | |
1194 | ||
1195 | // If so, the new one isn't. | |
1196 | isTrueIfPositive ^= true; | |
1197 | ||
1198 | if (isTrueIfPositive) | |
1199 | return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, | |
1200 | SubOne(RHS)); | |
1201 | else | |
1202 | return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, | |
1203 | AddOne(RHS)); | |
1204 | } | |
1205 | ||
1206 | if (LHSI->hasOneUse()) { | |
1207 | // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) | |
1a4d82fc JJ |
1208 | if (!ICI.isEquality() && XorCst->getValue().isSignBit()) { |
1209 | const APInt &SignBit = XorCst->getValue(); | |
223e47cc LB |
1210 | ICmpInst::Predicate Pred = ICI.isSigned() |
1211 | ? ICI.getUnsignedPredicate() | |
1212 | : ICI.getSignedPredicate(); | |
1213 | return new ICmpInst(Pred, LHSI->getOperand(0), | |
1a4d82fc | 1214 | Builder->getInt(RHSV ^ SignBit)); |
223e47cc LB |
1215 | } |
1216 | ||
1217 | // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) | |
1a4d82fc JJ |
1218 | if (!ICI.isEquality() && XorCst->isMaxValue(true)) { |
1219 | const APInt &NotSignBit = XorCst->getValue(); | |
223e47cc LB |
1220 | ICmpInst::Predicate Pred = ICI.isSigned() |
1221 | ? ICI.getUnsignedPredicate() | |
1222 | : ICI.getSignedPredicate(); | |
1223 | Pred = ICI.getSwappedPredicate(Pred); | |
1224 | return new ICmpInst(Pred, LHSI->getOperand(0), | |
1a4d82fc | 1225 | Builder->getInt(RHSV ^ NotSignBit)); |
223e47cc LB |
1226 | } |
1227 | } | |
1a4d82fc JJ |
1228 | |
1229 | // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) | |
1230 | // iff -C is a power of 2 | |
1231 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT && | |
1232 | XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2()) | |
1233 | return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst); | |
1234 | ||
1235 | // (icmp ult (xor X, C), -C) -> (icmp uge X, C) | |
1236 | // iff -C is a power of 2 | |
1237 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT && | |
1238 | XorCst->getValue() == -RHSV && RHSV.isPowerOf2()) | |
1239 | return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst); | |
223e47cc LB |
1240 | } |
1241 | break; | |
1a4d82fc | 1242 | case Instruction::And: // (icmp pred (and X, AndCst), RHS) |
223e47cc LB |
1243 | if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && |
1244 | LHSI->getOperand(0)->hasOneUse()) { | |
1a4d82fc | 1245 | ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1)); |
223e47cc LB |
1246 | |
1247 | // If the LHS is an AND of a truncating cast, we can widen the | |
1248 | // and/compare to be the input width without changing the value | |
1249 | // produced, eliminating a cast. | |
1250 | if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { | |
1251 | // We can do this transformation if either the AND constant does not | |
1252 | // have its sign bit set or if it is an equality comparison. | |
1253 | // Extending a relational comparison when we're checking the sign | |
1254 | // bit would not work. | |
1255 | if (ICI.isEquality() || | |
1a4d82fc | 1256 | (!AndCst->isNegative() && RHSV.isNonNegative())) { |
223e47cc LB |
1257 | Value *NewAnd = |
1258 | Builder->CreateAnd(Cast->getOperand(0), | |
1a4d82fc | 1259 | ConstantExpr::getZExt(AndCst, Cast->getSrcTy())); |
223e47cc LB |
1260 | NewAnd->takeName(LHSI); |
1261 | return new ICmpInst(ICI.getPredicate(), NewAnd, | |
1262 | ConstantExpr::getZExt(RHS, Cast->getSrcTy())); | |
1263 | } | |
1264 | } | |
1265 | ||
1266 | // If the LHS is an AND of a zext, and we have an equality compare, we can | |
1267 | // shrink the and/compare to the smaller type, eliminating the cast. | |
1268 | if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { | |
1269 | IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); | |
1270 | // Make sure we don't compare the upper bits, SimplifyDemandedBits | |
1271 | // should fold the icmp to true/false in that case. | |
1272 | if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { | |
1273 | Value *NewAnd = | |
1274 | Builder->CreateAnd(Cast->getOperand(0), | |
1a4d82fc | 1275 | ConstantExpr::getTrunc(AndCst, Ty)); |
223e47cc LB |
1276 | NewAnd->takeName(LHSI); |
1277 | return new ICmpInst(ICI.getPredicate(), NewAnd, | |
1278 | ConstantExpr::getTrunc(RHS, Ty)); | |
1279 | } | |
1280 | } | |
1281 | ||
1282 | // If this is: (X >> C1) & C2 != C3 (where any shift and any compare | |
1283 | // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This | |
1284 | // happens a LOT in code produced by the C front-end, for bitfield | |
1285 | // access. | |
1286 | BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); | |
1287 | if (Shift && !Shift->isShift()) | |
1a4d82fc | 1288 | Shift = nullptr; |
223e47cc LB |
1289 | |
1290 | ConstantInt *ShAmt; | |
1a4d82fc | 1291 | ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr; |
223e47cc | 1292 | |
1a4d82fc JJ |
1293 | // This seemingly simple opportunity to fold away a shift turns out to |
1294 | // be rather complicated. See PR17827 | |
1295 | // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details. | |
223e47cc | 1296 | if (ShAmt) { |
1a4d82fc JJ |
1297 | bool CanFold = false; |
1298 | unsigned ShiftOpcode = Shift->getOpcode(); | |
1299 | if (ShiftOpcode == Instruction::AShr) { | |
1300 | // There may be some constraints that make this possible, | |
1301 | // but nothing simple has been discovered yet. | |
1302 | CanFold = false; | |
1303 | } else if (ShiftOpcode == Instruction::Shl) { | |
1304 | // For a left shift, we can fold if the comparison is not signed. | |
1305 | // We can also fold a signed comparison if the mask value and | |
1306 | // comparison value are not negative. These constraints may not be | |
1307 | // obvious, but we can prove that they are correct using an SMT | |
1308 | // solver. | |
1309 | if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative())) | |
223e47cc | 1310 | CanFold = true; |
1a4d82fc JJ |
1311 | } else if (ShiftOpcode == Instruction::LShr) { |
1312 | // For a logical right shift, we can fold if the comparison is not | |
1313 | // signed. We can also fold a signed comparison if the shifted mask | |
1314 | // value and the shifted comparison value are not negative. | |
1315 | // These constraints may not be obvious, but we can prove that they | |
1316 | // are correct using an SMT solver. | |
1317 | if (!ICI.isSigned()) | |
1318 | CanFold = true; | |
1319 | else { | |
1320 | ConstantInt *ShiftedAndCst = | |
1321 | cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt)); | |
1322 | ConstantInt *ShiftedRHSCst = | |
1323 | cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt)); | |
1324 | ||
1325 | if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative()) | |
1326 | CanFold = true; | |
1327 | } | |
223e47cc LB |
1328 | } |
1329 | ||
1330 | if (CanFold) { | |
1331 | Constant *NewCst; | |
1a4d82fc | 1332 | if (ShiftOpcode == Instruction::Shl) |
223e47cc LB |
1333 | NewCst = ConstantExpr::getLShr(RHS, ShAmt); |
1334 | else | |
1335 | NewCst = ConstantExpr::getShl(RHS, ShAmt); | |
1336 | ||
1337 | // Check to see if we are shifting out any of the bits being | |
1338 | // compared. | |
1a4d82fc | 1339 | if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) { |
223e47cc LB |
1340 | // If we shifted bits out, the fold is not going to work out. |
1341 | // As a special case, check to see if this means that the | |
1342 | // result is always true or false now. | |
1343 | if (ICI.getPredicate() == ICmpInst::ICMP_EQ) | |
1a4d82fc | 1344 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc | 1345 | if (ICI.getPredicate() == ICmpInst::ICMP_NE) |
1a4d82fc | 1346 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
1347 | } else { |
1348 | ICI.setOperand(1, NewCst); | |
1a4d82fc JJ |
1349 | Constant *NewAndCst; |
1350 | if (ShiftOpcode == Instruction::Shl) | |
1351 | NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt); | |
223e47cc | 1352 | else |
1a4d82fc JJ |
1353 | NewAndCst = ConstantExpr::getShl(AndCst, ShAmt); |
1354 | LHSI->setOperand(1, NewAndCst); | |
223e47cc LB |
1355 | LHSI->setOperand(0, Shift->getOperand(0)); |
1356 | Worklist.Add(Shift); // Shift is dead. | |
1357 | return &ICI; | |
1358 | } | |
1359 | } | |
1360 | } | |
1361 | ||
1362 | // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is | |
1363 | // preferable because it allows the C<<Y expression to be hoisted out | |
1364 | // of a loop if Y is invariant and X is not. | |
1365 | if (Shift && Shift->hasOneUse() && RHSV == 0 && | |
1366 | ICI.isEquality() && !Shift->isArithmeticShift() && | |
1367 | !isa<Constant>(Shift->getOperand(0))) { | |
1368 | // Compute C << Y. | |
1369 | Value *NS; | |
1370 | if (Shift->getOpcode() == Instruction::LShr) { | |
1a4d82fc | 1371 | NS = Builder->CreateShl(AndCst, Shift->getOperand(1)); |
223e47cc LB |
1372 | } else { |
1373 | // Insert a logical shift. | |
1a4d82fc | 1374 | NS = Builder->CreateLShr(AndCst, Shift->getOperand(1)); |
223e47cc LB |
1375 | } |
1376 | ||
1377 | // Compute X & (C << Y). | |
1378 | Value *NewAnd = | |
1379 | Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); | |
1380 | ||
1381 | ICI.setOperand(0, NewAnd); | |
1382 | return &ICI; | |
1383 | } | |
970d7e83 | 1384 | |
1a4d82fc JJ |
1385 | // (icmp pred (and (or (lshr X, Y), X), 1), 0) --> |
1386 | // (icmp pred (and X, (or (shl 1, Y), 1), 0)) | |
1387 | // | |
1388 | // iff pred isn't signed | |
1389 | { | |
1390 | Value *X, *Y, *LShr; | |
1391 | if (!ICI.isSigned() && RHSV == 0) { | |
1392 | if (match(LHSI->getOperand(1), m_One())) { | |
1393 | Constant *One = cast<Constant>(LHSI->getOperand(1)); | |
1394 | Value *Or = LHSI->getOperand(0); | |
1395 | if (match(Or, m_Or(m_Value(LShr), m_Value(X))) && | |
1396 | match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) { | |
1397 | unsigned UsesRemoved = 0; | |
1398 | if (LHSI->hasOneUse()) | |
1399 | ++UsesRemoved; | |
1400 | if (Or->hasOneUse()) | |
1401 | ++UsesRemoved; | |
1402 | if (LShr->hasOneUse()) | |
1403 | ++UsesRemoved; | |
1404 | Value *NewOr = nullptr; | |
1405 | // Compute X & ((1 << Y) | 1) | |
1406 | if (auto *C = dyn_cast<Constant>(Y)) { | |
1407 | if (UsesRemoved >= 1) | |
1408 | NewOr = | |
1409 | ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); | |
1410 | } else { | |
1411 | if (UsesRemoved >= 3) | |
1412 | NewOr = Builder->CreateOr(Builder->CreateShl(One, Y, | |
1413 | LShr->getName(), | |
1414 | /*HasNUW=*/true), | |
1415 | One, Or->getName()); | |
1416 | } | |
1417 | if (NewOr) { | |
1418 | Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName()); | |
1419 | ICI.setOperand(0, NewAnd); | |
1420 | return &ICI; | |
1421 | } | |
1422 | } | |
1423 | } | |
1424 | } | |
1425 | } | |
1426 | ||
1427 | // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any | |
1428 | // bit set in (X & AndCst) will produce a result greater than RHSV. | |
970d7e83 | 1429 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { |
1a4d82fc JJ |
1430 | unsigned NTZ = AndCst->getValue().countTrailingZeros(); |
1431 | if ((NTZ < AndCst->getBitWidth()) && | |
1432 | APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV)) | |
970d7e83 LB |
1433 | return new ICmpInst(ICmpInst::ICMP_NE, LHSI, |
1434 | Constant::getNullValue(RHS->getType())); | |
1435 | } | |
223e47cc LB |
1436 | } |
1437 | ||
1438 | // Try to optimize things like "A[i]&42 == 0" to index computations. | |
1439 | if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { | |
1440 | if (GetElementPtrInst *GEP = | |
1441 | dyn_cast<GetElementPtrInst>(LI->getOperand(0))) | |
1442 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | |
1443 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | |
1444 | !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) { | |
1445 | ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1)); | |
1446 | if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) | |
1447 | return Res; | |
1448 | } | |
1449 | } | |
1a4d82fc JJ |
1450 | |
1451 | // X & -C == -C -> X > u ~C | |
1452 | // X & -C != -C -> X <= u ~C | |
1453 | // iff C is a power of 2 | |
1454 | if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2()) | |
1455 | return new ICmpInst( | |
1456 | ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT | |
1457 | : ICmpInst::ICMP_ULE, | |
1458 | LHSI->getOperand(0), SubOne(RHS)); | |
223e47cc LB |
1459 | break; |
1460 | ||
1461 | case Instruction::Or: { | |
1462 | if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) | |
1463 | break; | |
1464 | Value *P, *Q; | |
1465 | if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { | |
1466 | // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 | |
1467 | // -> and (icmp eq P, null), (icmp eq Q, null). | |
1468 | Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, | |
1469 | Constant::getNullValue(P->getType())); | |
1470 | Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, | |
1471 | Constant::getNullValue(Q->getType())); | |
1472 | Instruction *Op; | |
1473 | if (ICI.getPredicate() == ICmpInst::ICMP_EQ) | |
1474 | Op = BinaryOperator::CreateAnd(ICIP, ICIQ); | |
1475 | else | |
1476 | Op = BinaryOperator::CreateOr(ICIP, ICIQ); | |
1477 | return Op; | |
1478 | } | |
1479 | break; | |
1480 | } | |
1481 | ||
1a4d82fc JJ |
1482 | case Instruction::Mul: { // (icmp pred (mul X, Val), CI) |
1483 | ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); | |
1484 | if (!Val) break; | |
1485 | ||
1486 | // If this is a signed comparison to 0 and the mul is sign preserving, | |
1487 | // use the mul LHS operand instead. | |
1488 | ICmpInst::Predicate pred = ICI.getPredicate(); | |
1489 | if (isSignTest(pred, RHS) && !Val->isZero() && | |
1490 | cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) | |
1491 | return new ICmpInst(Val->isNegative() ? | |
1492 | ICmpInst::getSwappedPredicate(pred) : pred, | |
1493 | LHSI->getOperand(0), | |
1494 | Constant::getNullValue(RHS->getType())); | |
1495 | ||
1496 | break; | |
1497 | } | |
1498 | ||
223e47cc | 1499 | case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) |
1a4d82fc | 1500 | uint32_t TypeBits = RHSV.getBitWidth(); |
223e47cc | 1501 | ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); |
1a4d82fc JJ |
1502 | if (!ShAmt) { |
1503 | Value *X; | |
1504 | // (1 << X) pred P2 -> X pred Log2(P2) | |
1505 | if (match(LHSI, m_Shl(m_One(), m_Value(X)))) { | |
1506 | bool RHSVIsPowerOf2 = RHSV.isPowerOf2(); | |
1507 | ICmpInst::Predicate Pred = ICI.getPredicate(); | |
1508 | if (ICI.isUnsigned()) { | |
1509 | if (!RHSVIsPowerOf2) { | |
1510 | // (1 << X) < 30 -> X <= 4 | |
1511 | // (1 << X) <= 30 -> X <= 4 | |
1512 | // (1 << X) >= 30 -> X > 4 | |
1513 | // (1 << X) > 30 -> X > 4 | |
1514 | if (Pred == ICmpInst::ICMP_ULT) | |
1515 | Pred = ICmpInst::ICMP_ULE; | |
1516 | else if (Pred == ICmpInst::ICMP_UGE) | |
1517 | Pred = ICmpInst::ICMP_UGT; | |
1518 | } | |
1519 | unsigned RHSLog2 = RHSV.logBase2(); | |
1520 | ||
1521 | // (1 << X) >= 2147483648 -> X >= 31 -> X == 31 | |
1522 | // (1 << X) < 2147483648 -> X < 31 -> X != 31 | |
1523 | if (RHSLog2 == TypeBits-1) { | |
1524 | if (Pred == ICmpInst::ICMP_UGE) | |
1525 | Pred = ICmpInst::ICMP_EQ; | |
1526 | else if (Pred == ICmpInst::ICMP_ULT) | |
1527 | Pred = ICmpInst::ICMP_NE; | |
1528 | } | |
223e47cc | 1529 | |
1a4d82fc JJ |
1530 | return new ICmpInst(Pred, X, |
1531 | ConstantInt::get(RHS->getType(), RHSLog2)); | |
1532 | } else if (ICI.isSigned()) { | |
1533 | if (RHSV.isAllOnesValue()) { | |
1534 | // (1 << X) <= -1 -> X == 31 | |
1535 | if (Pred == ICmpInst::ICMP_SLE) | |
1536 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
1537 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1538 | ||
1539 | // (1 << X) > -1 -> X != 31 | |
1540 | if (Pred == ICmpInst::ICMP_SGT) | |
1541 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
1542 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1543 | } else if (!RHSV) { | |
1544 | // (1 << X) < 0 -> X == 31 | |
1545 | // (1 << X) <= 0 -> X == 31 | |
1546 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) | |
1547 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
1548 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1549 | ||
1550 | // (1 << X) >= 0 -> X != 31 | |
1551 | // (1 << X) > 0 -> X != 31 | |
1552 | if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) | |
1553 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
1554 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1555 | } | |
1556 | } else if (ICI.isEquality()) { | |
1557 | if (RHSVIsPowerOf2) | |
1558 | return new ICmpInst( | |
1559 | Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2())); | |
1560 | } | |
1561 | } | |
1562 | break; | |
1563 | } | |
223e47cc LB |
1564 | |
1565 | // Check that the shift amount is in range. If not, don't perform | |
1566 | // undefined shifts. When the shift is visited it will be | |
1567 | // simplified. | |
1568 | if (ShAmt->uge(TypeBits)) | |
1569 | break; | |
1570 | ||
1571 | if (ICI.isEquality()) { | |
1572 | // If we are comparing against bits always shifted out, the | |
1573 | // comparison cannot succeed. | |
1574 | Constant *Comp = | |
1575 | ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), | |
1576 | ShAmt); | |
1577 | if (Comp != RHS) {// Comparing against a bit that we know is zero. | |
1578 | bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; | |
1a4d82fc | 1579 | Constant *Cst = Builder->getInt1(IsICMP_NE); |
223e47cc LB |
1580 | return ReplaceInstUsesWith(ICI, Cst); |
1581 | } | |
1582 | ||
1583 | // If the shift is NUW, then it is just shifting out zeros, no need for an | |
1584 | // AND. | |
1585 | if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) | |
1586 | return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), | |
1587 | ConstantExpr::getLShr(RHS, ShAmt)); | |
1588 | ||
1a4d82fc JJ |
1589 | // If the shift is NSW and we compare to 0, then it is just shifting out |
1590 | // sign bits, no need for an AND either. | |
1591 | if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) | |
1592 | return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), | |
1593 | ConstantExpr::getLShr(RHS, ShAmt)); | |
1594 | ||
223e47cc LB |
1595 | if (LHSI->hasOneUse()) { |
1596 | // Otherwise strength reduce the shift into an and. | |
1597 | uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); | |
1a4d82fc JJ |
1598 | Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits, |
1599 | TypeBits - ShAmtVal)); | |
223e47cc LB |
1600 | |
1601 | Value *And = | |
1602 | Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); | |
1603 | return new ICmpInst(ICI.getPredicate(), And, | |
1604 | ConstantExpr::getLShr(RHS, ShAmt)); | |
1605 | } | |
1606 | } | |
1607 | ||
1a4d82fc JJ |
1608 | // If this is a signed comparison to 0 and the shift is sign preserving, |
1609 | // use the shift LHS operand instead. | |
1610 | ICmpInst::Predicate pred = ICI.getPredicate(); | |
1611 | if (isSignTest(pred, RHS) && | |
1612 | cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) | |
1613 | return new ICmpInst(pred, | |
1614 | LHSI->getOperand(0), | |
1615 | Constant::getNullValue(RHS->getType())); | |
1616 | ||
223e47cc LB |
1617 | // Otherwise, if this is a comparison of the sign bit, simplify to and/test. |
1618 | bool TrueIfSigned = false; | |
1619 | if (LHSI->hasOneUse() && | |
1620 | isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { | |
1621 | // (X << 31) <s 0 --> (X&1) != 0 | |
1622 | Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), | |
1623 | APInt::getOneBitSet(TypeBits, | |
1624 | TypeBits-ShAmt->getZExtValue()-1)); | |
1625 | Value *And = | |
1626 | Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); | |
1627 | return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, | |
1628 | And, Constant::getNullValue(And->getType())); | |
1629 | } | |
970d7e83 LB |
1630 | |
1631 | // Transform (icmp pred iM (shl iM %v, N), CI) | |
1632 | // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) | |
1633 | // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. | |
1634 | // This enables to get rid of the shift in favor of a trunc which can be | |
1635 | // free on the target. It has the additional benefit of comparing to a | |
1636 | // smaller constant, which will be target friendly. | |
1637 | unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); | |
1638 | if (LHSI->hasOneUse() && | |
1639 | Amt != 0 && RHSV.countTrailingZeros() >= Amt) { | |
1640 | Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); | |
1641 | Constant *NCI = ConstantExpr::getTrunc( | |
1642 | ConstantExpr::getAShr(RHS, | |
1643 | ConstantInt::get(RHS->getType(), Amt)), | |
1644 | NTy); | |
1645 | return new ICmpInst(ICI.getPredicate(), | |
1646 | Builder->CreateTrunc(LHSI->getOperand(0), NTy), | |
1647 | NCI); | |
1648 | } | |
1649 | ||
223e47cc LB |
1650 | break; |
1651 | } | |
1652 | ||
1653 | case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) | |
1654 | case Instruction::AShr: { | |
1655 | // Handle equality comparisons of shift-by-constant. | |
1656 | BinaryOperator *BO = cast<BinaryOperator>(LHSI); | |
1657 | if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { | |
1658 | if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) | |
1659 | return Res; | |
1660 | } | |
1661 | ||
1662 | // Handle exact shr's. | |
1663 | if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { | |
1664 | if (RHSV.isMinValue()) | |
1665 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); | |
1666 | } | |
1667 | break; | |
1668 | } | |
1669 | ||
1670 | case Instruction::SDiv: | |
1671 | case Instruction::UDiv: | |
1672 | // Fold: icmp pred ([us]div X, C1), C2 -> range test | |
1673 | // Fold this div into the comparison, producing a range check. | |
1674 | // Determine, based on the divide type, what the range is being | |
1675 | // checked. If there is an overflow on the low or high side, remember | |
1676 | // it, otherwise compute the range [low, hi) bounding the new value. | |
1677 | // See: InsertRangeTest above for the kinds of replacements possible. | |
1678 | if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) | |
1679 | if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), | |
1680 | DivRHS)) | |
1681 | return R; | |
1682 | break; | |
1683 | ||
1a4d82fc JJ |
1684 | case Instruction::Sub: { |
1685 | ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0)); | |
1686 | if (!LHSC) break; | |
1687 | const APInt &LHSV = LHSC->getValue(); | |
1688 | ||
1689 | // C1-X <u C2 -> (X|(C2-1)) == C1 | |
1690 | // iff C1 & (C2-1) == C2-1 | |
1691 | // C2 is a power of 2 | |
1692 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && | |
1693 | RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1)) | |
1694 | return new ICmpInst(ICmpInst::ICMP_EQ, | |
1695 | Builder->CreateOr(LHSI->getOperand(1), RHSV - 1), | |
1696 | LHSC); | |
1697 | ||
1698 | // C1-X >u C2 -> (X|C2) != C1 | |
1699 | // iff C1 & C2 == C2 | |
1700 | // C2+1 is a power of 2 | |
1701 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && | |
1702 | (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV) | |
1703 | return new ICmpInst(ICmpInst::ICMP_NE, | |
1704 | Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC); | |
1705 | break; | |
1706 | } | |
1707 | ||
223e47cc LB |
1708 | case Instruction::Add: |
1709 | // Fold: icmp pred (add X, C1), C2 | |
1710 | if (!ICI.isEquality()) { | |
1711 | ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); | |
1712 | if (!LHSC) break; | |
1713 | const APInt &LHSV = LHSC->getValue(); | |
1714 | ||
1715 | ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) | |
1716 | .subtract(LHSV); | |
1717 | ||
1718 | if (ICI.isSigned()) { | |
1719 | if (CR.getLower().isSignBit()) { | |
1720 | return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), | |
1a4d82fc | 1721 | Builder->getInt(CR.getUpper())); |
223e47cc LB |
1722 | } else if (CR.getUpper().isSignBit()) { |
1723 | return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), | |
1a4d82fc | 1724 | Builder->getInt(CR.getLower())); |
223e47cc LB |
1725 | } |
1726 | } else { | |
1727 | if (CR.getLower().isMinValue()) { | |
1728 | return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), | |
1a4d82fc | 1729 | Builder->getInt(CR.getUpper())); |
223e47cc LB |
1730 | } else if (CR.getUpper().isMinValue()) { |
1731 | return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), | |
1a4d82fc | 1732 | Builder->getInt(CR.getLower())); |
223e47cc LB |
1733 | } |
1734 | } | |
1a4d82fc JJ |
1735 | |
1736 | // X-C1 <u C2 -> (X & -C2) == C1 | |
1737 | // iff C1 & (C2-1) == 0 | |
1738 | // C2 is a power of 2 | |
1739 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && | |
1740 | RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0) | |
1741 | return new ICmpInst(ICmpInst::ICMP_EQ, | |
1742 | Builder->CreateAnd(LHSI->getOperand(0), -RHSV), | |
1743 | ConstantExpr::getNeg(LHSC)); | |
1744 | ||
1745 | // X-C1 >u C2 -> (X & ~C2) != C1 | |
1746 | // iff C1 & C2 == 0 | |
1747 | // C2+1 is a power of 2 | |
1748 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && | |
1749 | (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0) | |
1750 | return new ICmpInst(ICmpInst::ICMP_NE, | |
1751 | Builder->CreateAnd(LHSI->getOperand(0), ~RHSV), | |
1752 | ConstantExpr::getNeg(LHSC)); | |
223e47cc LB |
1753 | } |
1754 | break; | |
1755 | } | |
1756 | ||
1757 | // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. | |
1758 | if (ICI.isEquality()) { | |
1759 | bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; | |
1760 | ||
1761 | // If the first operand is (add|sub|and|or|xor|rem) with a constant, and | |
1762 | // the second operand is a constant, simplify a bit. | |
1763 | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { | |
1764 | switch (BO->getOpcode()) { | |
1765 | case Instruction::SRem: | |
1766 | // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. | |
1767 | if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ | |
1768 | const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); | |
1769 | if (V.sgt(1) && V.isPowerOf2()) { | |
1770 | Value *NewRem = | |
1771 | Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), | |
1772 | BO->getName()); | |
1773 | return new ICmpInst(ICI.getPredicate(), NewRem, | |
1774 | Constant::getNullValue(BO->getType())); | |
1775 | } | |
1776 | } | |
1777 | break; | |
1778 | case Instruction::Add: | |
1779 | // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. | |
1780 | if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1781 | if (BO->hasOneUse()) | |
1782 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1783 | ConstantExpr::getSub(RHS, BOp1C)); | |
1784 | } else if (RHSV == 0) { | |
1785 | // Replace ((add A, B) != 0) with (A != -B) if A or B is | |
1786 | // efficiently invertible, or if the add has just this one use. | |
1787 | Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); | |
1788 | ||
1789 | if (Value *NegVal = dyn_castNegVal(BOp1)) | |
1790 | return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); | |
1791 | if (Value *NegVal = dyn_castNegVal(BOp0)) | |
1792 | return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); | |
1793 | if (BO->hasOneUse()) { | |
1794 | Value *Neg = Builder->CreateNeg(BOp1); | |
1795 | Neg->takeName(BO); | |
1796 | return new ICmpInst(ICI.getPredicate(), BOp0, Neg); | |
1797 | } | |
1798 | } | |
1799 | break; | |
1800 | case Instruction::Xor: | |
1801 | // For the xor case, we can xor two constants together, eliminating | |
1802 | // the explicit xor. | |
1803 | if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { | |
1804 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1805 | ConstantExpr::getXor(RHS, BOC)); | |
1806 | } else if (RHSV == 0) { | |
1807 | // Replace ((xor A, B) != 0) with (A != B) | |
1808 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1809 | BO->getOperand(1)); | |
1810 | } | |
1811 | break; | |
1812 | case Instruction::Sub: | |
1813 | // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. | |
1814 | if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { | |
1815 | if (BO->hasOneUse()) | |
1816 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), | |
1817 | ConstantExpr::getSub(BOp0C, RHS)); | |
1818 | } else if (RHSV == 0) { | |
1819 | // Replace ((sub A, B) != 0) with (A != B) | |
1820 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1821 | BO->getOperand(1)); | |
1822 | } | |
1823 | break; | |
1824 | case Instruction::Or: | |
1825 | // If bits are being or'd in that are not present in the constant we | |
1826 | // are comparing against, then the comparison could never succeed! | |
1827 | if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1828 | Constant *NotCI = ConstantExpr::getNot(RHS); | |
1829 | if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) | |
1a4d82fc | 1830 | return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); |
223e47cc LB |
1831 | } |
1832 | break; | |
1833 | ||
1834 | case Instruction::And: | |
1835 | if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1836 | // If bits are being compared against that are and'd out, then the | |
1837 | // comparison can never succeed! | |
1838 | if ((RHSV & ~BOC->getValue()) != 0) | |
1a4d82fc | 1839 | return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); |
223e47cc LB |
1840 | |
1841 | // If we have ((X & C) == C), turn it into ((X & C) != 0). | |
1842 | if (RHS == BOC && RHSV.isPowerOf2()) | |
1843 | return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : | |
1844 | ICmpInst::ICMP_NE, LHSI, | |
1845 | Constant::getNullValue(RHS->getType())); | |
1846 | ||
1847 | // Don't perform the following transforms if the AND has multiple uses | |
1848 | if (!BO->hasOneUse()) | |
1849 | break; | |
1850 | ||
1851 | // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 | |
1852 | if (BOC->getValue().isSignBit()) { | |
1853 | Value *X = BO->getOperand(0); | |
1854 | Constant *Zero = Constant::getNullValue(X->getType()); | |
1855 | ICmpInst::Predicate pred = isICMP_NE ? | |
1856 | ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; | |
1857 | return new ICmpInst(pred, X, Zero); | |
1858 | } | |
1859 | ||
1860 | // ((X & ~7) == 0) --> X < 8 | |
1861 | if (RHSV == 0 && isHighOnes(BOC)) { | |
1862 | Value *X = BO->getOperand(0); | |
1863 | Constant *NegX = ConstantExpr::getNeg(BOC); | |
1864 | ICmpInst::Predicate pred = isICMP_NE ? | |
1865 | ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; | |
1866 | return new ICmpInst(pred, X, NegX); | |
1867 | } | |
1868 | } | |
1a4d82fc JJ |
1869 | break; |
1870 | case Instruction::Mul: | |
1871 | if (RHSV == 0 && BO->hasNoSignedWrap()) { | |
1872 | if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1873 | // The trivial case (mul X, 0) is handled by InstSimplify | |
1874 | // General case : (mul X, C) != 0 iff X != 0 | |
1875 | // (mul X, C) == 0 iff X == 0 | |
1876 | if (!BOC->isZero()) | |
1877 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1878 | Constant::getNullValue(RHS->getType())); | |
1879 | } | |
1880 | } | |
1881 | break; | |
223e47cc LB |
1882 | default: break; |
1883 | } | |
1884 | } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { | |
1885 | // Handle icmp {eq|ne} <intrinsic>, intcst. | |
1886 | switch (II->getIntrinsicID()) { | |
1887 | case Intrinsic::bswap: | |
1888 | Worklist.Add(II); | |
1889 | ICI.setOperand(0, II->getArgOperand(0)); | |
1a4d82fc | 1890 | ICI.setOperand(1, Builder->getInt(RHSV.byteSwap())); |
223e47cc LB |
1891 | return &ICI; |
1892 | case Intrinsic::ctlz: | |
1893 | case Intrinsic::cttz: | |
1894 | // ctz(A) == bitwidth(a) -> A == 0 and likewise for != | |
1895 | if (RHSV == RHS->getType()->getBitWidth()) { | |
1896 | Worklist.Add(II); | |
1897 | ICI.setOperand(0, II->getArgOperand(0)); | |
1898 | ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); | |
1899 | return &ICI; | |
1900 | } | |
1901 | break; | |
1902 | case Intrinsic::ctpop: | |
1903 | // popcount(A) == 0 -> A == 0 and likewise for != | |
1904 | if (RHS->isZero()) { | |
1905 | Worklist.Add(II); | |
1906 | ICI.setOperand(0, II->getArgOperand(0)); | |
1907 | ICI.setOperand(1, RHS); | |
1908 | return &ICI; | |
1909 | } | |
1910 | break; | |
1911 | default: | |
1912 | break; | |
1913 | } | |
1914 | } | |
1915 | } | |
1a4d82fc | 1916 | return nullptr; |
223e47cc LB |
1917 | } |
1918 | ||
1919 | /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). | |
1920 | /// We only handle extending casts so far. | |
1921 | /// | |
1922 | Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { | |
1923 | const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); | |
1924 | Value *LHSCIOp = LHSCI->getOperand(0); | |
1925 | Type *SrcTy = LHSCIOp->getType(); | |
1926 | Type *DestTy = LHSCI->getType(); | |
1927 | Value *RHSCIOp; | |
1928 | ||
1929 | // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the | |
1930 | // integer type is the same size as the pointer type. | |
1a4d82fc JJ |
1931 | if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && |
1932 | DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { | |
1933 | Value *RHSOp = nullptr; | |
223e47cc LB |
1934 | if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { |
1935 | RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); | |
1936 | } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { | |
1937 | RHSOp = RHSC->getOperand(0); | |
1938 | // If the pointer types don't match, insert a bitcast. | |
1939 | if (LHSCIOp->getType() != RHSOp->getType()) | |
1940 | RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); | |
1941 | } | |
1942 | ||
1943 | if (RHSOp) | |
1944 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); | |
1945 | } | |
1946 | ||
1947 | // The code below only handles extension cast instructions, so far. | |
1948 | // Enforce this. | |
1949 | if (LHSCI->getOpcode() != Instruction::ZExt && | |
1950 | LHSCI->getOpcode() != Instruction::SExt) | |
1a4d82fc | 1951 | return nullptr; |
223e47cc LB |
1952 | |
1953 | bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; | |
1954 | bool isSignedCmp = ICI.isSigned(); | |
1955 | ||
1956 | if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { | |
1957 | // Not an extension from the same type? | |
1958 | RHSCIOp = CI->getOperand(0); | |
1959 | if (RHSCIOp->getType() != LHSCIOp->getType()) | |
1a4d82fc | 1960 | return nullptr; |
223e47cc LB |
1961 | |
1962 | // If the signedness of the two casts doesn't agree (i.e. one is a sext | |
1963 | // and the other is a zext), then we can't handle this. | |
1964 | if (CI->getOpcode() != LHSCI->getOpcode()) | |
1a4d82fc | 1965 | return nullptr; |
223e47cc LB |
1966 | |
1967 | // Deal with equality cases early. | |
1968 | if (ICI.isEquality()) | |
1969 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); | |
1970 | ||
1971 | // A signed comparison of sign extended values simplifies into a | |
1972 | // signed comparison. | |
1973 | if (isSignedCmp && isSignedExt) | |
1974 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); | |
1975 | ||
1976 | // The other three cases all fold into an unsigned comparison. | |
1977 | return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); | |
1978 | } | |
1979 | ||
1980 | // If we aren't dealing with a constant on the RHS, exit early | |
1981 | ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); | |
1982 | if (!CI) | |
1a4d82fc | 1983 | return nullptr; |
223e47cc LB |
1984 | |
1985 | // Compute the constant that would happen if we truncated to SrcTy then | |
1986 | // reextended to DestTy. | |
1987 | Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); | |
1988 | Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), | |
1989 | Res1, DestTy); | |
1990 | ||
1991 | // If the re-extended constant didn't change... | |
1992 | if (Res2 == CI) { | |
1993 | // Deal with equality cases early. | |
1994 | if (ICI.isEquality()) | |
1995 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); | |
1996 | ||
1997 | // A signed comparison of sign extended values simplifies into a | |
1998 | // signed comparison. | |
1999 | if (isSignedExt && isSignedCmp) | |
2000 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); | |
2001 | ||
2002 | // The other three cases all fold into an unsigned comparison. | |
2003 | return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); | |
2004 | } | |
2005 | ||
2006 | // The re-extended constant changed so the constant cannot be represented | |
2007 | // in the shorter type. Consequently, we cannot emit a simple comparison. | |
2008 | // All the cases that fold to true or false will have already been handled | |
2009 | // by SimplifyICmpInst, so only deal with the tricky case. | |
2010 | ||
2011 | if (isSignedCmp || !isSignedExt) | |
1a4d82fc | 2012 | return nullptr; |
223e47cc LB |
2013 | |
2014 | // Evaluate the comparison for LT (we invert for GT below). LE and GE cases | |
2015 | // should have been folded away previously and not enter in here. | |
2016 | ||
2017 | // We're performing an unsigned comp with a sign extended value. | |
2018 | // This is true if the input is >= 0. [aka >s -1] | |
2019 | Constant *NegOne = Constant::getAllOnesValue(SrcTy); | |
2020 | Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); | |
2021 | ||
2022 | // Finally, return the value computed. | |
2023 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT) | |
2024 | return ReplaceInstUsesWith(ICI, Result); | |
2025 | ||
2026 | assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); | |
2027 | return BinaryOperator::CreateNot(Result); | |
2028 | } | |
2029 | ||
2030 | /// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: | |
2031 | /// I = icmp ugt (add (add A, B), CI2), CI1 | |
2032 | /// If this is of the form: | |
2033 | /// sum = a + b | |
2034 | /// if (sum+128 >u 255) | |
2035 | /// Then replace it with llvm.sadd.with.overflow.i8. | |
2036 | /// | |
2037 | static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, | |
2038 | ConstantInt *CI2, ConstantInt *CI1, | |
2039 | InstCombiner &IC) { | |
2040 | // The transformation we're trying to do here is to transform this into an | |
2041 | // llvm.sadd.with.overflow. To do this, we have to replace the original add | |
2042 | // with a narrower add, and discard the add-with-constant that is part of the | |
2043 | // range check (if we can't eliminate it, this isn't profitable). | |
2044 | ||
2045 | // In order to eliminate the add-with-constant, the compare can be its only | |
2046 | // use. | |
2047 | Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); | |
1a4d82fc | 2048 | if (!AddWithCst->hasOneUse()) return nullptr; |
223e47cc LB |
2049 | |
2050 | // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. | |
1a4d82fc | 2051 | if (!CI2->getValue().isPowerOf2()) return nullptr; |
223e47cc | 2052 | unsigned NewWidth = CI2->getValue().countTrailingZeros(); |
1a4d82fc | 2053 | if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr; |
223e47cc LB |
2054 | |
2055 | // The width of the new add formed is 1 more than the bias. | |
2056 | ++NewWidth; | |
2057 | ||
2058 | // Check to see that CI1 is an all-ones value with NewWidth bits. | |
2059 | if (CI1->getBitWidth() == NewWidth || | |
2060 | CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) | |
1a4d82fc | 2061 | return nullptr; |
223e47cc LB |
2062 | |
2063 | // This is only really a signed overflow check if the inputs have been | |
2064 | // sign-extended; check for that condition. For example, if CI2 is 2^31 and | |
2065 | // the operands of the add are 64 bits wide, we need at least 33 sign bits. | |
2066 | unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; | |
1a4d82fc JJ |
2067 | if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits || |
2068 | IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits) | |
2069 | return nullptr; | |
223e47cc LB |
2070 | |
2071 | // In order to replace the original add with a narrower | |
2072 | // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant | |
2073 | // and truncates that discard the high bits of the add. Verify that this is | |
2074 | // the case. | |
2075 | Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); | |
1a4d82fc JJ |
2076 | for (User *U : OrigAdd->users()) { |
2077 | if (U == AddWithCst) continue; | |
223e47cc LB |
2078 | |
2079 | // Only accept truncates for now. We would really like a nice recursive | |
2080 | // predicate like SimplifyDemandedBits, but which goes downwards the use-def | |
2081 | // chain to see which bits of a value are actually demanded. If the | |
2082 | // original add had another add which was then immediately truncated, we | |
2083 | // could still do the transformation. | |
1a4d82fc JJ |
2084 | TruncInst *TI = dyn_cast<TruncInst>(U); |
2085 | if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) | |
2086 | return nullptr; | |
223e47cc LB |
2087 | } |
2088 | ||
2089 | // If the pattern matches, truncate the inputs to the narrower type and | |
2090 | // use the sadd_with_overflow intrinsic to efficiently compute both the | |
2091 | // result and the overflow bit. | |
2092 | Module *M = I.getParent()->getParent()->getParent(); | |
2093 | ||
2094 | Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); | |
2095 | Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, | |
2096 | NewType); | |
2097 | ||
2098 | InstCombiner::BuilderTy *Builder = IC.Builder; | |
2099 | ||
2100 | // Put the new code above the original add, in case there are any uses of the | |
2101 | // add between the add and the compare. | |
2102 | Builder->SetInsertPoint(OrigAdd); | |
2103 | ||
2104 | Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); | |
2105 | Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); | |
2106 | CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); | |
2107 | Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); | |
2108 | Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); | |
2109 | ||
2110 | // The inner add was the result of the narrow add, zero extended to the | |
2111 | // wider type. Replace it with the result computed by the intrinsic. | |
2112 | IC.ReplaceInstUsesWith(*OrigAdd, ZExt); | |
2113 | ||
2114 | // The original icmp gets replaced with the overflow value. | |
2115 | return ExtractValueInst::Create(Call, 1, "sadd.overflow"); | |
2116 | } | |
2117 | ||
2118 | static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, | |
2119 | InstCombiner &IC) { | |
2120 | // Don't bother doing this transformation for pointers, don't do it for | |
2121 | // vectors. | |
1a4d82fc | 2122 | if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; |
223e47cc LB |
2123 | |
2124 | // If the add is a constant expr, then we don't bother transforming it. | |
2125 | Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); | |
1a4d82fc | 2126 | if (!OrigAdd) return nullptr; |
223e47cc LB |
2127 | |
2128 | Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); | |
2129 | ||
2130 | // Put the new code above the original add, in case there are any uses of the | |
2131 | // add between the add and the compare. | |
2132 | InstCombiner::BuilderTy *Builder = IC.Builder; | |
2133 | Builder->SetInsertPoint(OrigAdd); | |
2134 | ||
2135 | Module *M = I.getParent()->getParent()->getParent(); | |
2136 | Type *Ty = LHS->getType(); | |
2137 | Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); | |
2138 | CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); | |
2139 | Value *Add = Builder->CreateExtractValue(Call, 0); | |
2140 | ||
2141 | IC.ReplaceInstUsesWith(*OrigAdd, Add); | |
2142 | ||
2143 | // The original icmp gets replaced with the overflow value. | |
2144 | return ExtractValueInst::Create(Call, 1, "uadd.overflow"); | |
2145 | } | |
2146 | ||
1a4d82fc JJ |
2147 | /// \brief Recognize and process idiom involving test for multiplication |
2148 | /// overflow. | |
2149 | /// | |
2150 | /// The caller has matched a pattern of the form: | |
2151 | /// I = cmp u (mul(zext A, zext B), V | |
2152 | /// The function checks if this is a test for overflow and if so replaces | |
2153 | /// multiplication with call to 'mul.with.overflow' intrinsic. | |
2154 | /// | |
2155 | /// \param I Compare instruction. | |
2156 | /// \param MulVal Result of 'mult' instruction. It is one of the arguments of | |
2157 | /// the compare instruction. Must be of integer type. | |
2158 | /// \param OtherVal The other argument of compare instruction. | |
2159 | /// \returns Instruction which must replace the compare instruction, NULL if no | |
2160 | /// replacement required. | |
2161 | static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, | |
2162 | Value *OtherVal, InstCombiner &IC) { | |
2163 | // Don't bother doing this transformation for pointers, don't do it for | |
2164 | // vectors. | |
2165 | if (!isa<IntegerType>(MulVal->getType())) | |
2166 | return nullptr; | |
2167 | ||
2168 | assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); | |
2169 | assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); | |
2170 | Instruction *MulInstr = cast<Instruction>(MulVal); | |
2171 | assert(MulInstr->getOpcode() == Instruction::Mul); | |
2172 | ||
85aaf69f SL |
2173 | auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)), |
2174 | *RHS = cast<ZExtOperator>(MulInstr->getOperand(1)); | |
1a4d82fc JJ |
2175 | assert(LHS->getOpcode() == Instruction::ZExt); |
2176 | assert(RHS->getOpcode() == Instruction::ZExt); | |
2177 | Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); | |
2178 | ||
2179 | // Calculate type and width of the result produced by mul.with.overflow. | |
2180 | Type *TyA = A->getType(), *TyB = B->getType(); | |
2181 | unsigned WidthA = TyA->getPrimitiveSizeInBits(), | |
2182 | WidthB = TyB->getPrimitiveSizeInBits(); | |
2183 | unsigned MulWidth; | |
2184 | Type *MulType; | |
2185 | if (WidthB > WidthA) { | |
2186 | MulWidth = WidthB; | |
2187 | MulType = TyB; | |
2188 | } else { | |
2189 | MulWidth = WidthA; | |
2190 | MulType = TyA; | |
2191 | } | |
2192 | ||
2193 | // In order to replace the original mul with a narrower mul.with.overflow, | |
2194 | // all uses must ignore upper bits of the product. The number of used low | |
2195 | // bits must be not greater than the width of mul.with.overflow. | |
2196 | if (MulVal->hasNUsesOrMore(2)) | |
2197 | for (User *U : MulVal->users()) { | |
2198 | if (U == &I) | |
2199 | continue; | |
2200 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { | |
2201 | // Check if truncation ignores bits above MulWidth. | |
2202 | unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); | |
2203 | if (TruncWidth > MulWidth) | |
2204 | return nullptr; | |
2205 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { | |
2206 | // Check if AND ignores bits above MulWidth. | |
2207 | if (BO->getOpcode() != Instruction::And) | |
2208 | return nullptr; | |
2209 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
2210 | const APInt &CVal = CI->getValue(); | |
2211 | if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) | |
2212 | return nullptr; | |
2213 | } | |
2214 | } else { | |
2215 | // Other uses prohibit this transformation. | |
2216 | return nullptr; | |
2217 | } | |
2218 | } | |
2219 | ||
2220 | // Recognize patterns | |
2221 | switch (I.getPredicate()) { | |
2222 | case ICmpInst::ICMP_EQ: | |
2223 | case ICmpInst::ICMP_NE: | |
2224 | // Recognize pattern: | |
2225 | // mulval = mul(zext A, zext B) | |
2226 | // cmp eq/neq mulval, zext trunc mulval | |
2227 | if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal)) | |
2228 | if (Zext->hasOneUse()) { | |
2229 | Value *ZextArg = Zext->getOperand(0); | |
2230 | if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg)) | |
2231 | if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) | |
2232 | break; //Recognized | |
2233 | } | |
2234 | ||
2235 | // Recognize pattern: | |
2236 | // mulval = mul(zext A, zext B) | |
2237 | // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. | |
2238 | ConstantInt *CI; | |
2239 | Value *ValToMask; | |
2240 | if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { | |
2241 | if (ValToMask != MulVal) | |
2242 | return nullptr; | |
2243 | const APInt &CVal = CI->getValue() + 1; | |
2244 | if (CVal.isPowerOf2()) { | |
2245 | unsigned MaskWidth = CVal.logBase2(); | |
2246 | if (MaskWidth == MulWidth) | |
2247 | break; // Recognized | |
2248 | } | |
2249 | } | |
2250 | return nullptr; | |
2251 | ||
2252 | case ICmpInst::ICMP_UGT: | |
2253 | // Recognize pattern: | |
2254 | // mulval = mul(zext A, zext B) | |
2255 | // cmp ugt mulval, max | |
2256 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2257 | APInt MaxVal = APInt::getMaxValue(MulWidth); | |
2258 | MaxVal = MaxVal.zext(CI->getBitWidth()); | |
2259 | if (MaxVal.eq(CI->getValue())) | |
2260 | break; // Recognized | |
2261 | } | |
2262 | return nullptr; | |
2263 | ||
2264 | case ICmpInst::ICMP_UGE: | |
2265 | // Recognize pattern: | |
2266 | // mulval = mul(zext A, zext B) | |
2267 | // cmp uge mulval, max+1 | |
2268 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2269 | APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); | |
2270 | if (MaxVal.eq(CI->getValue())) | |
2271 | break; // Recognized | |
2272 | } | |
2273 | return nullptr; | |
2274 | ||
2275 | case ICmpInst::ICMP_ULE: | |
2276 | // Recognize pattern: | |
2277 | // mulval = mul(zext A, zext B) | |
2278 | // cmp ule mulval, max | |
2279 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2280 | APInt MaxVal = APInt::getMaxValue(MulWidth); | |
2281 | MaxVal = MaxVal.zext(CI->getBitWidth()); | |
2282 | if (MaxVal.eq(CI->getValue())) | |
2283 | break; // Recognized | |
2284 | } | |
2285 | return nullptr; | |
2286 | ||
2287 | case ICmpInst::ICMP_ULT: | |
2288 | // Recognize pattern: | |
2289 | // mulval = mul(zext A, zext B) | |
2290 | // cmp ule mulval, max + 1 | |
2291 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2292 | APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); | |
2293 | if (MaxVal.eq(CI->getValue())) | |
2294 | break; // Recognized | |
2295 | } | |
2296 | return nullptr; | |
2297 | ||
2298 | default: | |
2299 | return nullptr; | |
2300 | } | |
2301 | ||
2302 | InstCombiner::BuilderTy *Builder = IC.Builder; | |
2303 | Builder->SetInsertPoint(MulInstr); | |
2304 | Module *M = I.getParent()->getParent()->getParent(); | |
2305 | ||
2306 | // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) | |
2307 | Value *MulA = A, *MulB = B; | |
2308 | if (WidthA < MulWidth) | |
2309 | MulA = Builder->CreateZExt(A, MulType); | |
2310 | if (WidthB < MulWidth) | |
2311 | MulB = Builder->CreateZExt(B, MulType); | |
2312 | Value *F = | |
2313 | Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); | |
2314 | CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul"); | |
2315 | IC.Worklist.Add(MulInstr); | |
2316 | ||
2317 | // If there are uses of mul result other than the comparison, we know that | |
2318 | // they are truncation or binary AND. Change them to use result of | |
2319 | // mul.with.overflow and adjust properly mask/size. | |
2320 | if (MulVal->hasNUsesOrMore(2)) { | |
2321 | Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); | |
2322 | for (User *U : MulVal->users()) { | |
2323 | if (U == &I || U == OtherVal) | |
2324 | continue; | |
2325 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { | |
2326 | if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) | |
2327 | IC.ReplaceInstUsesWith(*TI, Mul); | |
2328 | else | |
2329 | TI->setOperand(0, Mul); | |
2330 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { | |
2331 | assert(BO->getOpcode() == Instruction::And); | |
2332 | // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) | |
2333 | ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); | |
2334 | APInt ShortMask = CI->getValue().trunc(MulWidth); | |
2335 | Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); | |
2336 | Instruction *Zext = | |
2337 | cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType())); | |
2338 | IC.Worklist.Add(Zext); | |
2339 | IC.ReplaceInstUsesWith(*BO, Zext); | |
2340 | } else { | |
2341 | llvm_unreachable("Unexpected Binary operation"); | |
2342 | } | |
2343 | IC.Worklist.Add(cast<Instruction>(U)); | |
2344 | } | |
2345 | } | |
2346 | if (isa<Instruction>(OtherVal)) | |
2347 | IC.Worklist.Add(cast<Instruction>(OtherVal)); | |
2348 | ||
2349 | // The original icmp gets replaced with the overflow value, maybe inverted | |
2350 | // depending on predicate. | |
2351 | bool Inverse = false; | |
2352 | switch (I.getPredicate()) { | |
2353 | case ICmpInst::ICMP_NE: | |
2354 | break; | |
2355 | case ICmpInst::ICMP_EQ: | |
2356 | Inverse = true; | |
2357 | break; | |
2358 | case ICmpInst::ICMP_UGT: | |
2359 | case ICmpInst::ICMP_UGE: | |
2360 | if (I.getOperand(0) == MulVal) | |
2361 | break; | |
2362 | Inverse = true; | |
2363 | break; | |
2364 | case ICmpInst::ICMP_ULT: | |
2365 | case ICmpInst::ICMP_ULE: | |
2366 | if (I.getOperand(1) == MulVal) | |
2367 | break; | |
2368 | Inverse = true; | |
2369 | break; | |
2370 | default: | |
2371 | llvm_unreachable("Unexpected predicate"); | |
2372 | } | |
2373 | if (Inverse) { | |
2374 | Value *Res = Builder->CreateExtractValue(Call, 1); | |
2375 | return BinaryOperator::CreateNot(Res); | |
2376 | } | |
2377 | ||
2378 | return ExtractValueInst::Create(Call, 1); | |
2379 | } | |
2380 | ||
223e47cc LB |
2381 | // DemandedBitsLHSMask - When performing a comparison against a constant, |
2382 | // it is possible that not all the bits in the LHS are demanded. This helper | |
2383 | // method computes the mask that IS demanded. | |
2384 | static APInt DemandedBitsLHSMask(ICmpInst &I, | |
2385 | unsigned BitWidth, bool isSignCheck) { | |
2386 | if (isSignCheck) | |
2387 | return APInt::getSignBit(BitWidth); | |
2388 | ||
2389 | ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); | |
2390 | if (!CI) return APInt::getAllOnesValue(BitWidth); | |
2391 | const APInt &RHS = CI->getValue(); | |
2392 | ||
2393 | switch (I.getPredicate()) { | |
2394 | // For a UGT comparison, we don't care about any bits that | |
2395 | // correspond to the trailing ones of the comparand. The value of these | |
2396 | // bits doesn't impact the outcome of the comparison, because any value | |
2397 | // greater than the RHS must differ in a bit higher than these due to carry. | |
2398 | case ICmpInst::ICMP_UGT: { | |
2399 | unsigned trailingOnes = RHS.countTrailingOnes(); | |
2400 | APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); | |
2401 | return ~lowBitsSet; | |
2402 | } | |
2403 | ||
2404 | // Similarly, for a ULT comparison, we don't care about the trailing zeros. | |
2405 | // Any value less than the RHS must differ in a higher bit because of carries. | |
2406 | case ICmpInst::ICMP_ULT: { | |
2407 | unsigned trailingZeros = RHS.countTrailingZeros(); | |
2408 | APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); | |
2409 | return ~lowBitsSet; | |
2410 | } | |
2411 | ||
2412 | default: | |
2413 | return APInt::getAllOnesValue(BitWidth); | |
2414 | } | |
2415 | ||
2416 | } | |
2417 | ||
1a4d82fc JJ |
2418 | /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst |
2419 | /// should be swapped. | |
2420 | /// The decision is based on how many times these two operands are reused | |
2421 | /// as subtract operands and their positions in those instructions. | |
2422 | /// The rational is that several architectures use the same instruction for | |
2423 | /// both subtract and cmp, thus it is better if the order of those operands | |
2424 | /// match. | |
2425 | /// \return true if Op0 and Op1 should be swapped. | |
2426 | static bool swapMayExposeCSEOpportunities(const Value * Op0, | |
2427 | const Value * Op1) { | |
2428 | // Filter out pointer value as those cannot appears directly in subtract. | |
2429 | // FIXME: we may want to go through inttoptrs or bitcasts. | |
2430 | if (Op0->getType()->isPointerTy()) | |
2431 | return false; | |
2432 | // Count every uses of both Op0 and Op1 in a subtract. | |
2433 | // Each time Op0 is the first operand, count -1: swapping is bad, the | |
2434 | // subtract has already the same layout as the compare. | |
2435 | // Each time Op0 is the second operand, count +1: swapping is good, the | |
2436 | // subtract has a different layout as the compare. | |
2437 | // At the end, if the benefit is greater than 0, Op0 should come second to | |
2438 | // expose more CSE opportunities. | |
2439 | int GlobalSwapBenefits = 0; | |
2440 | for (const User *U : Op0->users()) { | |
2441 | const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U); | |
2442 | if (!BinOp || BinOp->getOpcode() != Instruction::Sub) | |
2443 | continue; | |
2444 | // If Op0 is the first argument, this is not beneficial to swap the | |
2445 | // arguments. | |
2446 | int LocalSwapBenefits = -1; | |
2447 | unsigned Op1Idx = 1; | |
2448 | if (BinOp->getOperand(Op1Idx) == Op0) { | |
2449 | Op1Idx = 0; | |
2450 | LocalSwapBenefits = 1; | |
2451 | } | |
2452 | if (BinOp->getOperand(Op1Idx) != Op1) | |
2453 | continue; | |
2454 | GlobalSwapBenefits += LocalSwapBenefits; | |
2455 | } | |
2456 | return GlobalSwapBenefits > 0; | |
2457 | } | |
2458 | ||
85aaf69f SL |
2459 | /// \brief Check that one use is in the same block as the definition and all |
2460 | /// other uses are in blocks dominated by a given block | |
2461 | /// | |
2462 | /// \param DI Definition | |
2463 | /// \param UI Use | |
2464 | /// \param DB Block that must dominate all uses of \p DI outside | |
2465 | /// the parent block | |
2466 | /// \return true when \p UI is the only use of \p DI in the parent block | |
2467 | /// and all other uses of \p DI are in blocks dominated by \p DB. | |
2468 | /// | |
2469 | bool InstCombiner::dominatesAllUses(const Instruction *DI, | |
2470 | const Instruction *UI, | |
2471 | const BasicBlock *DB) const { | |
2472 | assert(DI && UI && "Instruction not defined\n"); | |
2473 | // ignore incomplete definitions | |
2474 | if (!DI->getParent()) | |
2475 | return false; | |
2476 | // DI and UI must be in the same block | |
2477 | if (DI->getParent() != UI->getParent()) | |
2478 | return false; | |
2479 | // Protect from self-referencing blocks | |
2480 | if (DI->getParent() == DB) | |
2481 | return false; | |
2482 | // DominatorTree available? | |
2483 | if (!DT) | |
2484 | return false; | |
2485 | for (const User *U : DI->users()) { | |
2486 | auto *Usr = cast<Instruction>(U); | |
2487 | if (Usr != UI && !DT->dominates(DB, Usr->getParent())) | |
2488 | return false; | |
2489 | } | |
2490 | return true; | |
2491 | } | |
2492 | ||
2493 | /// | |
2494 | /// true when the instruction sequence within a block is select-cmp-br. | |
2495 | /// | |
2496 | static bool isChainSelectCmpBranch(const SelectInst *SI) { | |
2497 | const BasicBlock *BB = SI->getParent(); | |
2498 | if (!BB) | |
2499 | return false; | |
2500 | auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator()); | |
2501 | if (!BI || BI->getNumSuccessors() != 2) | |
2502 | return false; | |
2503 | auto *IC = dyn_cast<ICmpInst>(BI->getCondition()); | |
2504 | if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI)) | |
2505 | return false; | |
2506 | return true; | |
2507 | } | |
2508 | ||
2509 | /// | |
2510 | /// \brief True when a select result is replaced by one of its operands | |
2511 | /// in select-icmp sequence. This will eventually result in the elimination | |
2512 | /// of the select. | |
2513 | /// | |
2514 | /// \param SI Select instruction | |
2515 | /// \param Icmp Compare instruction | |
2516 | /// \param SIOpd Operand that replaces the select | |
2517 | /// | |
2518 | /// Notes: | |
2519 | /// - The replacement is global and requires dominator information | |
2520 | /// - The caller is responsible for the actual replacement | |
2521 | /// | |
2522 | /// Example: | |
2523 | /// | |
2524 | /// entry: | |
2525 | /// %4 = select i1 %3, %C* %0, %C* null | |
2526 | /// %5 = icmp eq %C* %4, null | |
2527 | /// br i1 %5, label %9, label %7 | |
2528 | /// ... | |
2529 | /// ; <label>:7 ; preds = %entry | |
2530 | /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0 | |
2531 | /// ... | |
2532 | /// | |
2533 | /// can be transformed to | |
2534 | /// | |
2535 | /// %5 = icmp eq %C* %0, null | |
2536 | /// %6 = select i1 %3, i1 %5, i1 true | |
2537 | /// br i1 %6, label %9, label %7 | |
2538 | /// ... | |
2539 | /// ; <label>:7 ; preds = %entry | |
2540 | /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0! | |
2541 | /// | |
2542 | /// Similar when the first operand of the select is a constant or/and | |
2543 | /// the compare is for not equal rather than equal. | |
2544 | /// | |
2545 | /// NOTE: The function is only called when the select and compare constants | |
2546 | /// are equal, the optimization can work only for EQ predicates. This is not a | |
2547 | /// major restriction since a NE compare should be 'normalized' to an equal | |
2548 | /// compare, which usually happens in the combiner and test case | |
2549 | /// select-cmp-br.ll | |
2550 | /// checks for it. | |
2551 | bool InstCombiner::replacedSelectWithOperand(SelectInst *SI, | |
2552 | const ICmpInst *Icmp, | |
2553 | const unsigned SIOpd) { | |
2554 | assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!"); | |
2555 | if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) { | |
2556 | BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1); | |
2557 | // The check for the unique predecessor is not the best that can be | |
2558 | // done. But it protects efficiently against cases like when SI's | |
2559 | // home block has two successors, Succ and Succ1, and Succ1 predecessor | |
2560 | // of Succ. Then SI can't be replaced by SIOpd because the use that gets | |
2561 | // replaced can be reached on either path. So the uniqueness check | |
2562 | // guarantees that the path all uses of SI (outside SI's parent) are on | |
2563 | // is disjoint from all other paths out of SI. But that information | |
2564 | // is more expensive to compute, and the trade-off here is in favor | |
2565 | // of compile-time. | |
2566 | if (Succ->getUniquePredecessor() && dominatesAllUses(SI, Icmp, Succ)) { | |
2567 | NumSel++; | |
2568 | SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent()); | |
2569 | return true; | |
2570 | } | |
2571 | } | |
2572 | return false; | |
2573 | } | |
2574 | ||
223e47cc LB |
2575 | Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { |
2576 | bool Changed = false; | |
2577 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | |
1a4d82fc JJ |
2578 | unsigned Op0Cplxity = getComplexity(Op0); |
2579 | unsigned Op1Cplxity = getComplexity(Op1); | |
223e47cc LB |
2580 | |
2581 | /// Orders the operands of the compare so that they are listed from most | |
2582 | /// complex to least complex. This puts constants before unary operators, | |
2583 | /// before binary operators. | |
1a4d82fc JJ |
2584 | if (Op0Cplxity < Op1Cplxity || |
2585 | (Op0Cplxity == Op1Cplxity && | |
2586 | swapMayExposeCSEOpportunities(Op0, Op1))) { | |
223e47cc LB |
2587 | I.swapOperands(); |
2588 | std::swap(Op0, Op1); | |
2589 | Changed = true; | |
2590 | } | |
2591 | ||
85aaf69f | 2592 | if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC)) |
223e47cc LB |
2593 | return ReplaceInstUsesWith(I, V); |
2594 | ||
2595 | // comparing -val or val with non-zero is the same as just comparing val | |
2596 | // ie, abs(val) != 0 -> val != 0 | |
2597 | if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) | |
2598 | { | |
2599 | Value *Cond, *SelectTrue, *SelectFalse; | |
2600 | if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), | |
2601 | m_Value(SelectFalse)))) { | |
2602 | if (Value *V = dyn_castNegVal(SelectTrue)) { | |
2603 | if (V == SelectFalse) | |
2604 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); | |
2605 | } | |
2606 | else if (Value *V = dyn_castNegVal(SelectFalse)) { | |
2607 | if (V == SelectTrue) | |
2608 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); | |
2609 | } | |
2610 | } | |
2611 | } | |
2612 | ||
2613 | Type *Ty = Op0->getType(); | |
2614 | ||
2615 | // icmp's with boolean values can always be turned into bitwise operations | |
2616 | if (Ty->isIntegerTy(1)) { | |
2617 | switch (I.getPredicate()) { | |
2618 | default: llvm_unreachable("Invalid icmp instruction!"); | |
2619 | case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) | |
2620 | Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); | |
2621 | return BinaryOperator::CreateNot(Xor); | |
2622 | } | |
2623 | case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B | |
2624 | return BinaryOperator::CreateXor(Op0, Op1); | |
2625 | ||
2626 | case ICmpInst::ICMP_UGT: | |
2627 | std::swap(Op0, Op1); // Change icmp ugt -> icmp ult | |
2628 | // FALL THROUGH | |
2629 | case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B | |
2630 | Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); | |
2631 | return BinaryOperator::CreateAnd(Not, Op1); | |
2632 | } | |
2633 | case ICmpInst::ICMP_SGT: | |
2634 | std::swap(Op0, Op1); // Change icmp sgt -> icmp slt | |
2635 | // FALL THROUGH | |
2636 | case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B | |
2637 | Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); | |
2638 | return BinaryOperator::CreateAnd(Not, Op0); | |
2639 | } | |
2640 | case ICmpInst::ICMP_UGE: | |
2641 | std::swap(Op0, Op1); // Change icmp uge -> icmp ule | |
2642 | // FALL THROUGH | |
2643 | case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B | |
2644 | Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); | |
2645 | return BinaryOperator::CreateOr(Not, Op1); | |
2646 | } | |
2647 | case ICmpInst::ICMP_SGE: | |
2648 | std::swap(Op0, Op1); // Change icmp sge -> icmp sle | |
2649 | // FALL THROUGH | |
2650 | case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B | |
2651 | Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); | |
2652 | return BinaryOperator::CreateOr(Not, Op0); | |
2653 | } | |
2654 | } | |
2655 | } | |
2656 | ||
2657 | unsigned BitWidth = 0; | |
2658 | if (Ty->isIntOrIntVectorTy()) | |
2659 | BitWidth = Ty->getScalarSizeInBits(); | |
1a4d82fc JJ |
2660 | else if (DL) // Pointers require DL info to get their size. |
2661 | BitWidth = DL->getTypeSizeInBits(Ty->getScalarType()); | |
223e47cc LB |
2662 | |
2663 | bool isSignBit = false; | |
2664 | ||
2665 | // See if we are doing a comparison with a constant. | |
2666 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
1a4d82fc | 2667 | Value *A = nullptr, *B = nullptr; |
223e47cc LB |
2668 | |
2669 | // Match the following pattern, which is a common idiom when writing | |
2670 | // overflow-safe integer arithmetic function. The source performs an | |
2671 | // addition in wider type, and explicitly checks for overflow using | |
2672 | // comparisons against INT_MIN and INT_MAX. Simplify this by using the | |
2673 | // sadd_with_overflow intrinsic. | |
2674 | // | |
2675 | // TODO: This could probably be generalized to handle other overflow-safe | |
2676 | // operations if we worked out the formulas to compute the appropriate | |
2677 | // magic constants. | |
2678 | // | |
2679 | // sum = a + b | |
2680 | // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 | |
2681 | { | |
2682 | ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI | |
2683 | if (I.getPredicate() == ICmpInst::ICMP_UGT && | |
2684 | match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) | |
2685 | if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) | |
2686 | return Res; | |
2687 | } | |
2688 | ||
85aaf69f SL |
2689 | // The following transforms are only 'worth it' if the only user of the |
2690 | // subtraction is the icmp. | |
2691 | if (Op0->hasOneUse()) { | |
2692 | // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) | |
2693 | if (I.isEquality() && CI->isZero() && | |
2694 | match(Op0, m_Sub(m_Value(A), m_Value(B)))) | |
2695 | return new ICmpInst(I.getPredicate(), A, B); | |
2696 | ||
2697 | // (icmp sgt (sub nsw A B), -1) -> (icmp sge A, B) | |
2698 | if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isAllOnesValue() && | |
2699 | match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) | |
2700 | return new ICmpInst(ICmpInst::ICMP_SGE, A, B); | |
2701 | ||
2702 | // (icmp sgt (sub nsw A B), 0) -> (icmp sgt A, B) | |
2703 | if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isZero() && | |
2704 | match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) | |
2705 | return new ICmpInst(ICmpInst::ICMP_SGT, A, B); | |
2706 | ||
2707 | // (icmp slt (sub nsw A B), 0) -> (icmp slt A, B) | |
2708 | if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isZero() && | |
2709 | match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) | |
2710 | return new ICmpInst(ICmpInst::ICMP_SLT, A, B); | |
2711 | ||
2712 | // (icmp slt (sub nsw A B), 1) -> (icmp sle A, B) | |
2713 | if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isOne() && | |
2714 | match(Op0, m_NSWSub(m_Value(A), m_Value(B)))) | |
2715 | return new ICmpInst(ICmpInst::ICMP_SLE, A, B); | |
223e47cc LB |
2716 | } |
2717 | ||
2718 | // If we have an icmp le or icmp ge instruction, turn it into the | |
2719 | // appropriate icmp lt or icmp gt instruction. This allows us to rely on | |
2720 | // them being folded in the code below. The SimplifyICmpInst code has | |
2721 | // already handled the edge cases for us, so we just assert on them. | |
2722 | switch (I.getPredicate()) { | |
2723 | default: break; | |
2724 | case ICmpInst::ICMP_ULE: | |
2725 | assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE | |
2726 | return new ICmpInst(ICmpInst::ICMP_ULT, Op0, | |
1a4d82fc | 2727 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2728 | case ICmpInst::ICMP_SLE: |
2729 | assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE | |
2730 | return new ICmpInst(ICmpInst::ICMP_SLT, Op0, | |
1a4d82fc | 2731 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2732 | case ICmpInst::ICMP_UGE: |
2733 | assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE | |
2734 | return new ICmpInst(ICmpInst::ICMP_UGT, Op0, | |
1a4d82fc | 2735 | Builder->getInt(CI->getValue()-1)); |
223e47cc LB |
2736 | case ICmpInst::ICMP_SGE: |
2737 | assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE | |
2738 | return new ICmpInst(ICmpInst::ICMP_SGT, Op0, | |
1a4d82fc JJ |
2739 | Builder->getInt(CI->getValue()-1)); |
2740 | } | |
2741 | ||
1a4d82fc JJ |
2742 | if (I.isEquality()) { |
2743 | ConstantInt *CI2; | |
2744 | if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) || | |
2745 | match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) { | |
85aaf69f SL |
2746 | // (icmp eq/ne (ashr/lshr const2, A), const1) |
2747 | if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2)) | |
2748 | return Inst; | |
2749 | } | |
2750 | if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) { | |
2751 | // (icmp eq/ne (shl const2, A), const1) | |
2752 | if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2)) | |
2753 | return Inst; | |
1a4d82fc | 2754 | } |
223e47cc LB |
2755 | } |
2756 | ||
2757 | // If this comparison is a normal comparison, it demands all | |
2758 | // bits, if it is a sign bit comparison, it only demands the sign bit. | |
2759 | bool UnusedBit; | |
2760 | isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); | |
2761 | } | |
2762 | ||
2763 | // See if we can fold the comparison based on range information we can get | |
2764 | // by checking whether bits are known to be zero or one in the input. | |
2765 | if (BitWidth != 0) { | |
2766 | APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); | |
2767 | APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); | |
2768 | ||
2769 | if (SimplifyDemandedBits(I.getOperandUse(0), | |
2770 | DemandedBitsLHSMask(I, BitWidth, isSignBit), | |
2771 | Op0KnownZero, Op0KnownOne, 0)) | |
2772 | return &I; | |
2773 | if (SimplifyDemandedBits(I.getOperandUse(1), | |
2774 | APInt::getAllOnesValue(BitWidth), | |
2775 | Op1KnownZero, Op1KnownOne, 0)) | |
2776 | return &I; | |
2777 | ||
2778 | // Given the known and unknown bits, compute a range that the LHS could be | |
2779 | // in. Compute the Min, Max and RHS values based on the known bits. For the | |
2780 | // EQ and NE we use unsigned values. | |
2781 | APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); | |
2782 | APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); | |
2783 | if (I.isSigned()) { | |
2784 | ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, | |
2785 | Op0Min, Op0Max); | |
2786 | ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, | |
2787 | Op1Min, Op1Max); | |
2788 | } else { | |
2789 | ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, | |
2790 | Op0Min, Op0Max); | |
2791 | ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, | |
2792 | Op1Min, Op1Max); | |
2793 | } | |
2794 | ||
2795 | // If Min and Max are known to be the same, then SimplifyDemandedBits | |
2796 | // figured out that the LHS is a constant. Just constant fold this now so | |
2797 | // that code below can assume that Min != Max. | |
2798 | if (!isa<Constant>(Op0) && Op0Min == Op0Max) | |
2799 | return new ICmpInst(I.getPredicate(), | |
2800 | ConstantInt::get(Op0->getType(), Op0Min), Op1); | |
2801 | if (!isa<Constant>(Op1) && Op1Min == Op1Max) | |
2802 | return new ICmpInst(I.getPredicate(), Op0, | |
2803 | ConstantInt::get(Op1->getType(), Op1Min)); | |
2804 | ||
2805 | // Based on the range information we know about the LHS, see if we can | |
2806 | // simplify this comparison. For example, (x&4) < 8 is always true. | |
2807 | switch (I.getPredicate()) { | |
2808 | default: llvm_unreachable("Unknown icmp opcode!"); | |
2809 | case ICmpInst::ICMP_EQ: { | |
2810 | if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) | |
2811 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2812 | ||
2813 | // If all bits are known zero except for one, then we know at most one | |
2814 | // bit is set. If the comparison is against zero, then this is a check | |
2815 | // to see if *that* bit is set. | |
2816 | APInt Op0KnownZeroInverted = ~Op0KnownZero; | |
1a4d82fc | 2817 | if (~Op1KnownZero == 0) { |
223e47cc | 2818 | // If the LHS is an AND with the same constant, look through it. |
1a4d82fc JJ |
2819 | Value *LHS = nullptr; |
2820 | ConstantInt *LHSC = nullptr; | |
223e47cc LB |
2821 | if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || |
2822 | LHSC->getValue() != Op0KnownZeroInverted) | |
2823 | LHS = Op0; | |
2824 | ||
2825 | // If the LHS is 1 << x, and we know the result is a power of 2 like 8, | |
2826 | // then turn "((1 << x)&8) == 0" into "x != 3". | |
1a4d82fc JJ |
2827 | // or turn "((1 << x)&7) == 0" into "x > 2". |
2828 | Value *X = nullptr; | |
223e47cc | 2829 | if (match(LHS, m_Shl(m_One(), m_Value(X)))) { |
1a4d82fc JJ |
2830 | APInt ValToCheck = Op0KnownZeroInverted; |
2831 | if (ValToCheck.isPowerOf2()) { | |
2832 | unsigned CmpVal = ValToCheck.countTrailingZeros(); | |
2833 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
2834 | ConstantInt::get(X->getType(), CmpVal)); | |
2835 | } else if ((++ValToCheck).isPowerOf2()) { | |
2836 | unsigned CmpVal = ValToCheck.countTrailingZeros() - 1; | |
2837 | return new ICmpInst(ICmpInst::ICMP_UGT, X, | |
2838 | ConstantInt::get(X->getType(), CmpVal)); | |
2839 | } | |
223e47cc LB |
2840 | } |
2841 | ||
2842 | // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, | |
2843 | // then turn "((8 >>u x)&1) == 0" into "x != 3". | |
2844 | const APInt *CI; | |
2845 | if (Op0KnownZeroInverted == 1 && | |
2846 | match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) | |
2847 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
2848 | ConstantInt::get(X->getType(), | |
2849 | CI->countTrailingZeros())); | |
2850 | } | |
2851 | ||
2852 | break; | |
2853 | } | |
2854 | case ICmpInst::ICMP_NE: { | |
2855 | if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) | |
2856 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2857 | ||
2858 | // If all bits are known zero except for one, then we know at most one | |
2859 | // bit is set. If the comparison is against zero, then this is a check | |
2860 | // to see if *that* bit is set. | |
2861 | APInt Op0KnownZeroInverted = ~Op0KnownZero; | |
1a4d82fc | 2862 | if (~Op1KnownZero == 0) { |
223e47cc | 2863 | // If the LHS is an AND with the same constant, look through it. |
1a4d82fc JJ |
2864 | Value *LHS = nullptr; |
2865 | ConstantInt *LHSC = nullptr; | |
223e47cc LB |
2866 | if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || |
2867 | LHSC->getValue() != Op0KnownZeroInverted) | |
2868 | LHS = Op0; | |
2869 | ||
2870 | // If the LHS is 1 << x, and we know the result is a power of 2 like 8, | |
2871 | // then turn "((1 << x)&8) != 0" into "x == 3". | |
1a4d82fc JJ |
2872 | // or turn "((1 << x)&7) != 0" into "x < 3". |
2873 | Value *X = nullptr; | |
223e47cc | 2874 | if (match(LHS, m_Shl(m_One(), m_Value(X)))) { |
1a4d82fc JJ |
2875 | APInt ValToCheck = Op0KnownZeroInverted; |
2876 | if (ValToCheck.isPowerOf2()) { | |
2877 | unsigned CmpVal = ValToCheck.countTrailingZeros(); | |
2878 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
2879 | ConstantInt::get(X->getType(), CmpVal)); | |
2880 | } else if ((++ValToCheck).isPowerOf2()) { | |
2881 | unsigned CmpVal = ValToCheck.countTrailingZeros(); | |
2882 | return new ICmpInst(ICmpInst::ICMP_ULT, X, | |
2883 | ConstantInt::get(X->getType(), CmpVal)); | |
2884 | } | |
223e47cc LB |
2885 | } |
2886 | ||
2887 | // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, | |
2888 | // then turn "((8 >>u x)&1) != 0" into "x == 3". | |
2889 | const APInt *CI; | |
2890 | if (Op0KnownZeroInverted == 1 && | |
2891 | match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) | |
2892 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
2893 | ConstantInt::get(X->getType(), | |
2894 | CI->countTrailingZeros())); | |
2895 | } | |
2896 | ||
2897 | break; | |
2898 | } | |
2899 | case ICmpInst::ICMP_ULT: | |
2900 | if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) | |
2901 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2902 | if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) | |
2903 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2904 | if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) | |
2905 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2906 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2907 | if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C | |
2908 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2909 | Builder->getInt(CI->getValue()-1)); |
223e47cc LB |
2910 | |
2911 | // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear | |
2912 | if (CI->isMinValue(true)) | |
2913 | return new ICmpInst(ICmpInst::ICMP_SGT, Op0, | |
2914 | Constant::getAllOnesValue(Op0->getType())); | |
2915 | } | |
2916 | break; | |
2917 | case ICmpInst::ICMP_UGT: | |
2918 | if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) | |
2919 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2920 | if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) | |
2921 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2922 | ||
2923 | if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) | |
2924 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2925 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2926 | if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C | |
2927 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2928 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2929 | |
2930 | // (x >u 2147483647) -> (x <s 0) -> true if sign bit set | |
2931 | if (CI->isMaxValue(true)) | |
2932 | return new ICmpInst(ICmpInst::ICMP_SLT, Op0, | |
2933 | Constant::getNullValue(Op0->getType())); | |
2934 | } | |
2935 | break; | |
2936 | case ICmpInst::ICMP_SLT: | |
2937 | if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) | |
2938 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2939 | if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) | |
2940 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2941 | if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) | |
2942 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2943 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2944 | if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C | |
2945 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2946 | Builder->getInt(CI->getValue()-1)); |
223e47cc LB |
2947 | } |
2948 | break; | |
2949 | case ICmpInst::ICMP_SGT: | |
2950 | if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) | |
2951 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2952 | if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) | |
2953 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2954 | ||
2955 | if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) | |
2956 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2957 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2958 | if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C | |
2959 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2960 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2961 | } |
2962 | break; | |
2963 | case ICmpInst::ICMP_SGE: | |
2964 | assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); | |
2965 | if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) | |
2966 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2967 | if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) | |
2968 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2969 | break; | |
2970 | case ICmpInst::ICMP_SLE: | |
2971 | assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); | |
2972 | if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) | |
2973 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2974 | if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) | |
2975 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2976 | break; | |
2977 | case ICmpInst::ICMP_UGE: | |
2978 | assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); | |
2979 | if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) | |
2980 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2981 | if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) | |
2982 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2983 | break; | |
2984 | case ICmpInst::ICMP_ULE: | |
2985 | assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); | |
2986 | if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) | |
2987 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2988 | if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) | |
2989 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2990 | break; | |
2991 | } | |
2992 | ||
2993 | // Turn a signed comparison into an unsigned one if both operands | |
2994 | // are known to have the same sign. | |
2995 | if (I.isSigned() && | |
2996 | ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || | |
2997 | (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) | |
2998 | return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); | |
2999 | } | |
3000 | ||
3001 | // Test if the ICmpInst instruction is used exclusively by a select as | |
3002 | // part of a minimum or maximum operation. If so, refrain from doing | |
3003 | // any other folding. This helps out other analyses which understand | |
3004 | // non-obfuscated minimum and maximum idioms, such as ScalarEvolution | |
3005 | // and CodeGen. And in this case, at least one of the comparison | |
3006 | // operands has at least one user besides the compare (the select), | |
3007 | // which would often largely negate the benefit of folding anyway. | |
3008 | if (I.hasOneUse()) | |
1a4d82fc | 3009 | if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin())) |
223e47cc LB |
3010 | if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || |
3011 | (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) | |
1a4d82fc | 3012 | return nullptr; |
223e47cc LB |
3013 | |
3014 | // See if we are doing a comparison between a constant and an instruction that | |
3015 | // can be folded into the comparison. | |
3016 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
3017 | // Since the RHS is a ConstantInt (CI), if the left hand side is an | |
3018 | // instruction, see if that instruction also has constants so that the | |
3019 | // instruction can be folded into the icmp | |
3020 | if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) | |
3021 | if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) | |
3022 | return Res; | |
3023 | } | |
3024 | ||
3025 | // Handle icmp with constant (but not simple integer constant) RHS | |
3026 | if (Constant *RHSC = dyn_cast<Constant>(Op1)) { | |
3027 | if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) | |
3028 | switch (LHSI->getOpcode()) { | |
3029 | case Instruction::GetElementPtr: | |
3030 | // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null | |
3031 | if (RHSC->isNullValue() && | |
3032 | cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) | |
3033 | return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), | |
3034 | Constant::getNullValue(LHSI->getOperand(0)->getType())); | |
3035 | break; | |
3036 | case Instruction::PHI: | |
3037 | // Only fold icmp into the PHI if the phi and icmp are in the same | |
3038 | // block. If in the same block, we're encouraging jump threading. If | |
3039 | // not, we are just pessimizing the code by making an i1 phi. | |
3040 | if (LHSI->getParent() == I.getParent()) | |
3041 | if (Instruction *NV = FoldOpIntoPhi(I)) | |
3042 | return NV; | |
3043 | break; | |
3044 | case Instruction::Select: { | |
3045 | // If either operand of the select is a constant, we can fold the | |
3046 | // comparison into the select arms, which will cause one to be | |
3047 | // constant folded and the select turned into a bitwise or. | |
1a4d82fc | 3048 | Value *Op1 = nullptr, *Op2 = nullptr; |
85aaf69f SL |
3049 | ConstantInt *CI = 0; |
3050 | if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { | |
223e47cc | 3051 | Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); |
85aaf69f SL |
3052 | CI = dyn_cast<ConstantInt>(Op1); |
3053 | } | |
3054 | if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { | |
223e47cc | 3055 | Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); |
85aaf69f SL |
3056 | CI = dyn_cast<ConstantInt>(Op2); |
3057 | } | |
223e47cc LB |
3058 | |
3059 | // We only want to perform this transformation if it will not lead to | |
3060 | // additional code. This is true if either both sides of the select | |
3061 | // fold to a constant (in which case the icmp is replaced with a select | |
3062 | // which will usually simplify) or this is the only user of the | |
3063 | // select (in which case we are trading a select+icmp for a simpler | |
85aaf69f SL |
3064 | // select+icmp) or all uses of the select can be replaced based on |
3065 | // dominance information ("Global cases"). | |
3066 | bool Transform = false; | |
3067 | if (Op1 && Op2) | |
3068 | Transform = true; | |
3069 | else if (Op1 || Op2) { | |
3070 | // Local case | |
3071 | if (LHSI->hasOneUse()) | |
3072 | Transform = true; | |
3073 | // Global cases | |
3074 | else if (CI && !CI->isZero()) | |
3075 | // When Op1 is constant try replacing select with second operand. | |
3076 | // Otherwise Op2 is constant and try replacing select with first | |
3077 | // operand. | |
3078 | Transform = replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, | |
3079 | Op1 ? 2 : 1); | |
3080 | } | |
3081 | if (Transform) { | |
223e47cc LB |
3082 | if (!Op1) |
3083 | Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), | |
3084 | RHSC, I.getName()); | |
3085 | if (!Op2) | |
3086 | Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), | |
3087 | RHSC, I.getName()); | |
3088 | return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); | |
3089 | } | |
3090 | break; | |
3091 | } | |
3092 | case Instruction::IntToPtr: | |
3093 | // icmp pred inttoptr(X), null -> icmp pred X, 0 | |
1a4d82fc JJ |
3094 | if (RHSC->isNullValue() && DL && |
3095 | DL->getIntPtrType(RHSC->getType()) == | |
223e47cc LB |
3096 | LHSI->getOperand(0)->getType()) |
3097 | return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), | |
3098 | Constant::getNullValue(LHSI->getOperand(0)->getType())); | |
3099 | break; | |
3100 | ||
3101 | case Instruction::Load: | |
3102 | // Try to optimize things like "A[i] > 4" to index computations. | |
3103 | if (GetElementPtrInst *GEP = | |
3104 | dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { | |
3105 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | |
3106 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | |
3107 | !cast<LoadInst>(LHSI)->isVolatile()) | |
3108 | if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) | |
3109 | return Res; | |
3110 | } | |
3111 | break; | |
3112 | } | |
3113 | } | |
3114 | ||
3115 | // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. | |
3116 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) | |
3117 | if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) | |
3118 | return NI; | |
3119 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) | |
3120 | if (Instruction *NI = FoldGEPICmp(GEP, Op0, | |
3121 | ICmpInst::getSwappedPredicate(I.getPredicate()), I)) | |
3122 | return NI; | |
3123 | ||
3124 | // Test to see if the operands of the icmp are casted versions of other | |
3125 | // values. If the ptr->ptr cast can be stripped off both arguments, we do so | |
3126 | // now. | |
3127 | if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { | |
3128 | if (Op0->getType()->isPointerTy() && | |
3129 | (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { | |
3130 | // We keep moving the cast from the left operand over to the right | |
3131 | // operand, where it can often be eliminated completely. | |
3132 | Op0 = CI->getOperand(0); | |
3133 | ||
3134 | // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast | |
3135 | // so eliminate it as well. | |
3136 | if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) | |
3137 | Op1 = CI2->getOperand(0); | |
3138 | ||
3139 | // If Op1 is a constant, we can fold the cast into the constant. | |
3140 | if (Op0->getType() != Op1->getType()) { | |
3141 | if (Constant *Op1C = dyn_cast<Constant>(Op1)) { | |
3142 | Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); | |
3143 | } else { | |
3144 | // Otherwise, cast the RHS right before the icmp | |
3145 | Op1 = Builder->CreateBitCast(Op1, Op0->getType()); | |
3146 | } | |
3147 | } | |
3148 | return new ICmpInst(I.getPredicate(), Op0, Op1); | |
3149 | } | |
3150 | } | |
3151 | ||
3152 | if (isa<CastInst>(Op0)) { | |
3153 | // Handle the special case of: icmp (cast bool to X), <cst> | |
3154 | // This comes up when you have code like | |
3155 | // int X = A < B; | |
3156 | // if (X) ... | |
3157 | // For generality, we handle any zero-extension of any operand comparison | |
3158 | // with a constant or another cast from the same type. | |
3159 | if (isa<Constant>(Op1) || isa<CastInst>(Op1)) | |
3160 | if (Instruction *R = visitICmpInstWithCastAndCast(I)) | |
3161 | return R; | |
3162 | } | |
3163 | ||
3164 | // Special logic for binary operators. | |
3165 | BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); | |
3166 | BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); | |
3167 | if (BO0 || BO1) { | |
3168 | CmpInst::Predicate Pred = I.getPredicate(); | |
3169 | bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; | |
3170 | if (BO0 && isa<OverflowingBinaryOperator>(BO0)) | |
3171 | NoOp0WrapProblem = ICmpInst::isEquality(Pred) || | |
3172 | (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || | |
3173 | (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); | |
3174 | if (BO1 && isa<OverflowingBinaryOperator>(BO1)) | |
3175 | NoOp1WrapProblem = ICmpInst::isEquality(Pred) || | |
3176 | (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || | |
3177 | (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); | |
3178 | ||
3179 | // Analyze the case when either Op0 or Op1 is an add instruction. | |
3180 | // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). | |
1a4d82fc | 3181 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; |
223e47cc LB |
3182 | if (BO0 && BO0->getOpcode() == Instruction::Add) |
3183 | A = BO0->getOperand(0), B = BO0->getOperand(1); | |
3184 | if (BO1 && BO1->getOpcode() == Instruction::Add) | |
3185 | C = BO1->getOperand(0), D = BO1->getOperand(1); | |
3186 | ||
85aaf69f SL |
3187 | // icmp (X+cst) < 0 --> X < -cst |
3188 | if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero())) | |
3189 | if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B)) | |
3190 | if (!RHSC->isMinValue(/*isSigned=*/true)) | |
3191 | return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC)); | |
3192 | ||
223e47cc LB |
3193 | // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. |
3194 | if ((A == Op1 || B == Op1) && NoOp0WrapProblem) | |
3195 | return new ICmpInst(Pred, A == Op1 ? B : A, | |
3196 | Constant::getNullValue(Op1->getType())); | |
3197 | ||
3198 | // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. | |
3199 | if ((C == Op0 || D == Op0) && NoOp1WrapProblem) | |
3200 | return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), | |
3201 | C == Op0 ? D : C); | |
3202 | ||
3203 | // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. | |
3204 | if (A && C && (A == C || A == D || B == C || B == D) && | |
3205 | NoOp0WrapProblem && NoOp1WrapProblem && | |
3206 | // Try not to increase register pressure. | |
3207 | BO0->hasOneUse() && BO1->hasOneUse()) { | |
3208 | // Determine Y and Z in the form icmp (X+Y), (X+Z). | |
970d7e83 LB |
3209 | Value *Y, *Z; |
3210 | if (A == C) { | |
3211 | // C + B == C + D -> B == D | |
3212 | Y = B; | |
3213 | Z = D; | |
3214 | } else if (A == D) { | |
3215 | // D + B == C + D -> B == C | |
3216 | Y = B; | |
3217 | Z = C; | |
3218 | } else if (B == C) { | |
3219 | // A + C == C + D -> A == D | |
3220 | Y = A; | |
3221 | Z = D; | |
3222 | } else { | |
3223 | assert(B == D); | |
3224 | // A + D == C + D -> A == C | |
3225 | Y = A; | |
3226 | Z = C; | |
3227 | } | |
223e47cc LB |
3228 | return new ICmpInst(Pred, Y, Z); |
3229 | } | |
3230 | ||
1a4d82fc JJ |
3231 | // icmp slt (X + -1), Y -> icmp sle X, Y |
3232 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && | |
3233 | match(B, m_AllOnes())) | |
3234 | return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); | |
3235 | ||
3236 | // icmp sge (X + -1), Y -> icmp sgt X, Y | |
3237 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && | |
3238 | match(B, m_AllOnes())) | |
3239 | return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); | |
3240 | ||
3241 | // icmp sle (X + 1), Y -> icmp slt X, Y | |
3242 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && | |
3243 | match(B, m_One())) | |
3244 | return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); | |
3245 | ||
3246 | // icmp sgt (X + 1), Y -> icmp sge X, Y | |
3247 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && | |
3248 | match(B, m_One())) | |
3249 | return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); | |
3250 | ||
3251 | // if C1 has greater magnitude than C2: | |
3252 | // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y | |
3253 | // s.t. C3 = C1 - C2 | |
3254 | // | |
3255 | // if C2 has greater magnitude than C1: | |
3256 | // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) | |
3257 | // s.t. C3 = C2 - C1 | |
3258 | if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && | |
3259 | (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) | |
3260 | if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) | |
3261 | if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { | |
3262 | const APInt &AP1 = C1->getValue(); | |
3263 | const APInt &AP2 = C2->getValue(); | |
3264 | if (AP1.isNegative() == AP2.isNegative()) { | |
3265 | APInt AP1Abs = C1->getValue().abs(); | |
3266 | APInt AP2Abs = C2->getValue().abs(); | |
3267 | if (AP1Abs.uge(AP2Abs)) { | |
3268 | ConstantInt *C3 = Builder->getInt(AP1 - AP2); | |
3269 | Value *NewAdd = Builder->CreateNSWAdd(A, C3); | |
3270 | return new ICmpInst(Pred, NewAdd, C); | |
3271 | } else { | |
3272 | ConstantInt *C3 = Builder->getInt(AP2 - AP1); | |
3273 | Value *NewAdd = Builder->CreateNSWAdd(C, C3); | |
3274 | return new ICmpInst(Pred, A, NewAdd); | |
3275 | } | |
3276 | } | |
3277 | } | |
3278 | ||
3279 | ||
223e47cc LB |
3280 | // Analyze the case when either Op0 or Op1 is a sub instruction. |
3281 | // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). | |
1a4d82fc | 3282 | A = nullptr; B = nullptr; C = nullptr; D = nullptr; |
223e47cc LB |
3283 | if (BO0 && BO0->getOpcode() == Instruction::Sub) |
3284 | A = BO0->getOperand(0), B = BO0->getOperand(1); | |
3285 | if (BO1 && BO1->getOpcode() == Instruction::Sub) | |
3286 | C = BO1->getOperand(0), D = BO1->getOperand(1); | |
3287 | ||
3288 | // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. | |
3289 | if (A == Op1 && NoOp0WrapProblem) | |
3290 | return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); | |
3291 | ||
3292 | // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. | |
3293 | if (C == Op0 && NoOp1WrapProblem) | |
3294 | return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); | |
3295 | ||
3296 | // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. | |
3297 | if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && | |
3298 | // Try not to increase register pressure. | |
3299 | BO0->hasOneUse() && BO1->hasOneUse()) | |
3300 | return new ICmpInst(Pred, A, C); | |
3301 | ||
3302 | // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. | |
3303 | if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && | |
3304 | // Try not to increase register pressure. | |
3305 | BO0->hasOneUse() && BO1->hasOneUse()) | |
3306 | return new ICmpInst(Pred, D, B); | |
3307 | ||
1a4d82fc JJ |
3308 | // icmp (0-X) < cst --> x > -cst |
3309 | if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { | |
3310 | Value *X; | |
3311 | if (match(BO0, m_Neg(m_Value(X)))) | |
3312 | if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) | |
3313 | if (!RHSC->isMinValue(/*isSigned=*/true)) | |
3314 | return new ICmpInst(I.getSwappedPredicate(), X, | |
3315 | ConstantExpr::getNeg(RHSC)); | |
3316 | } | |
3317 | ||
3318 | BinaryOperator *SRem = nullptr; | |
223e47cc LB |
3319 | // icmp (srem X, Y), Y |
3320 | if (BO0 && BO0->getOpcode() == Instruction::SRem && | |
3321 | Op1 == BO0->getOperand(1)) | |
3322 | SRem = BO0; | |
3323 | // icmp Y, (srem X, Y) | |
3324 | else if (BO1 && BO1->getOpcode() == Instruction::SRem && | |
3325 | Op0 == BO1->getOperand(1)) | |
3326 | SRem = BO1; | |
3327 | if (SRem) { | |
3328 | // We don't check hasOneUse to avoid increasing register pressure because | |
3329 | // the value we use is the same value this instruction was already using. | |
3330 | switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { | |
3331 | default: break; | |
3332 | case ICmpInst::ICMP_EQ: | |
3333 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
3334 | case ICmpInst::ICMP_NE: | |
3335 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
3336 | case ICmpInst::ICMP_SGT: | |
3337 | case ICmpInst::ICMP_SGE: | |
3338 | return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), | |
3339 | Constant::getAllOnesValue(SRem->getType())); | |
3340 | case ICmpInst::ICMP_SLT: | |
3341 | case ICmpInst::ICMP_SLE: | |
3342 | return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), | |
3343 | Constant::getNullValue(SRem->getType())); | |
3344 | } | |
3345 | } | |
3346 | ||
3347 | if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && | |
3348 | BO0->hasOneUse() && BO1->hasOneUse() && | |
3349 | BO0->getOperand(1) == BO1->getOperand(1)) { | |
3350 | switch (BO0->getOpcode()) { | |
3351 | default: break; | |
3352 | case Instruction::Add: | |
3353 | case Instruction::Sub: | |
3354 | case Instruction::Xor: | |
3355 | if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b | |
3356 | return new ICmpInst(I.getPredicate(), BO0->getOperand(0), | |
3357 | BO1->getOperand(0)); | |
3358 | // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b | |
3359 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { | |
3360 | if (CI->getValue().isSignBit()) { | |
3361 | ICmpInst::Predicate Pred = I.isSigned() | |
3362 | ? I.getUnsignedPredicate() | |
3363 | : I.getSignedPredicate(); | |
3364 | return new ICmpInst(Pred, BO0->getOperand(0), | |
3365 | BO1->getOperand(0)); | |
3366 | } | |
3367 | ||
3368 | if (CI->isMaxValue(true)) { | |
3369 | ICmpInst::Predicate Pred = I.isSigned() | |
3370 | ? I.getUnsignedPredicate() | |
3371 | : I.getSignedPredicate(); | |
3372 | Pred = I.getSwappedPredicate(Pred); | |
3373 | return new ICmpInst(Pred, BO0->getOperand(0), | |
3374 | BO1->getOperand(0)); | |
3375 | } | |
3376 | } | |
3377 | break; | |
3378 | case Instruction::Mul: | |
3379 | if (!I.isEquality()) | |
3380 | break; | |
3381 | ||
3382 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { | |
3383 | // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask | |
3384 | // Mask = -1 >> count-trailing-zeros(Cst). | |
3385 | if (!CI->isZero() && !CI->isOne()) { | |
3386 | const APInt &AP = CI->getValue(); | |
3387 | ConstantInt *Mask = ConstantInt::get(I.getContext(), | |
3388 | APInt::getLowBitsSet(AP.getBitWidth(), | |
3389 | AP.getBitWidth() - | |
3390 | AP.countTrailingZeros())); | |
3391 | Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); | |
3392 | Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); | |
3393 | return new ICmpInst(I.getPredicate(), And1, And2); | |
3394 | } | |
3395 | } | |
3396 | break; | |
3397 | case Instruction::UDiv: | |
3398 | case Instruction::LShr: | |
3399 | if (I.isSigned()) | |
3400 | break; | |
3401 | // fall-through | |
3402 | case Instruction::SDiv: | |
3403 | case Instruction::AShr: | |
3404 | if (!BO0->isExact() || !BO1->isExact()) | |
3405 | break; | |
3406 | return new ICmpInst(I.getPredicate(), BO0->getOperand(0), | |
3407 | BO1->getOperand(0)); | |
3408 | case Instruction::Shl: { | |
3409 | bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); | |
3410 | bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); | |
3411 | if (!NUW && !NSW) | |
3412 | break; | |
3413 | if (!NSW && I.isSigned()) | |
3414 | break; | |
3415 | return new ICmpInst(I.getPredicate(), BO0->getOperand(0), | |
3416 | BO1->getOperand(0)); | |
3417 | } | |
3418 | } | |
3419 | } | |
3420 | } | |
3421 | ||
3422 | { Value *A, *B; | |
1a4d82fc JJ |
3423 | // Transform (A & ~B) == 0 --> (A & B) != 0 |
3424 | // and (A & ~B) != 0 --> (A & B) == 0 | |
3425 | // if A is a power of 2. | |
3426 | if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && | |
85aaf69f SL |
3427 | match(Op1, m_Zero()) && |
3428 | isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality()) | |
1a4d82fc JJ |
3429 | return new ICmpInst(I.getInversePredicate(), |
3430 | Builder->CreateAnd(A, B), | |
3431 | Op1); | |
3432 | ||
223e47cc LB |
3433 | // ~x < ~y --> y < x |
3434 | // ~x < cst --> ~cst < x | |
3435 | if (match(Op0, m_Not(m_Value(A)))) { | |
3436 | if (match(Op1, m_Not(m_Value(B)))) | |
3437 | return new ICmpInst(I.getPredicate(), B, A); | |
3438 | if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) | |
3439 | return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); | |
3440 | } | |
3441 | ||
3442 | // (a+b) <u a --> llvm.uadd.with.overflow. | |
3443 | // (a+b) <u b --> llvm.uadd.with.overflow. | |
3444 | if (I.getPredicate() == ICmpInst::ICMP_ULT && | |
3445 | match(Op0, m_Add(m_Value(A), m_Value(B))) && | |
3446 | (Op1 == A || Op1 == B)) | |
3447 | if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) | |
3448 | return R; | |
3449 | ||
3450 | // a >u (a+b) --> llvm.uadd.with.overflow. | |
3451 | // b >u (a+b) --> llvm.uadd.with.overflow. | |
3452 | if (I.getPredicate() == ICmpInst::ICMP_UGT && | |
3453 | match(Op1, m_Add(m_Value(A), m_Value(B))) && | |
3454 | (Op0 == A || Op0 == B)) | |
3455 | if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) | |
3456 | return R; | |
1a4d82fc JJ |
3457 | |
3458 | // (zext a) * (zext b) --> llvm.umul.with.overflow. | |
3459 | if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { | |
3460 | if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this)) | |
3461 | return R; | |
3462 | } | |
3463 | if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { | |
3464 | if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this)) | |
3465 | return R; | |
3466 | } | |
223e47cc LB |
3467 | } |
3468 | ||
3469 | if (I.isEquality()) { | |
3470 | Value *A, *B, *C, *D; | |
3471 | ||
3472 | if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { | |
3473 | if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 | |
3474 | Value *OtherVal = A == Op1 ? B : A; | |
3475 | return new ICmpInst(I.getPredicate(), OtherVal, | |
3476 | Constant::getNullValue(A->getType())); | |
3477 | } | |
3478 | ||
3479 | if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { | |
3480 | // A^c1 == C^c2 --> A == C^(c1^c2) | |
3481 | ConstantInt *C1, *C2; | |
3482 | if (match(B, m_ConstantInt(C1)) && | |
3483 | match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { | |
1a4d82fc | 3484 | Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); |
223e47cc LB |
3485 | Value *Xor = Builder->CreateXor(C, NC); |
3486 | return new ICmpInst(I.getPredicate(), A, Xor); | |
3487 | } | |
3488 | ||
3489 | // A^B == A^D -> B == D | |
3490 | if (A == C) return new ICmpInst(I.getPredicate(), B, D); | |
3491 | if (A == D) return new ICmpInst(I.getPredicate(), B, C); | |
3492 | if (B == C) return new ICmpInst(I.getPredicate(), A, D); | |
3493 | if (B == D) return new ICmpInst(I.getPredicate(), A, C); | |
3494 | } | |
3495 | } | |
3496 | ||
3497 | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && | |
3498 | (A == Op0 || B == Op0)) { | |
3499 | // A == (A^B) -> B == 0 | |
3500 | Value *OtherVal = A == Op0 ? B : A; | |
3501 | return new ICmpInst(I.getPredicate(), OtherVal, | |
3502 | Constant::getNullValue(A->getType())); | |
3503 | } | |
3504 | ||
3505 | // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 | |
3506 | if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && | |
3507 | match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { | |
1a4d82fc | 3508 | Value *X = nullptr, *Y = nullptr, *Z = nullptr; |
223e47cc LB |
3509 | |
3510 | if (A == C) { | |
3511 | X = B; Y = D; Z = A; | |
3512 | } else if (A == D) { | |
3513 | X = B; Y = C; Z = A; | |
3514 | } else if (B == C) { | |
3515 | X = A; Y = D; Z = B; | |
3516 | } else if (B == D) { | |
3517 | X = A; Y = C; Z = B; | |
3518 | } | |
3519 | ||
3520 | if (X) { // Build (X^Y) & Z | |
3521 | Op1 = Builder->CreateXor(X, Y); | |
3522 | Op1 = Builder->CreateAnd(Op1, Z); | |
3523 | I.setOperand(0, Op1); | |
3524 | I.setOperand(1, Constant::getNullValue(Op1->getType())); | |
3525 | return &I; | |
3526 | } | |
3527 | } | |
3528 | ||
3529 | // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B) | |
3530 | // and (B & (1<<X)-1) == (zext A) --> A == (trunc B) | |
3531 | ConstantInt *Cst1; | |
3532 | if ((Op0->hasOneUse() && | |
3533 | match(Op0, m_ZExt(m_Value(A))) && | |
3534 | match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || | |
3535 | (Op1->hasOneUse() && | |
3536 | match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && | |
3537 | match(Op1, m_ZExt(m_Value(A))))) { | |
3538 | APInt Pow2 = Cst1->getValue() + 1; | |
3539 | if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && | |
3540 | Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) | |
3541 | return new ICmpInst(I.getPredicate(), A, | |
3542 | Builder->CreateTrunc(B, A->getType())); | |
3543 | } | |
3544 | ||
1a4d82fc JJ |
3545 | // (A >> C) == (B >> C) --> (A^B) u< (1 << C) |
3546 | // For lshr and ashr pairs. | |
3547 | if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && | |
3548 | match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || | |
3549 | (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && | |
3550 | match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { | |
3551 | unsigned TypeBits = Cst1->getBitWidth(); | |
3552 | unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); | |
3553 | if (ShAmt < TypeBits && ShAmt != 0) { | |
3554 | ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE | |
3555 | ? ICmpInst::ICMP_UGE | |
3556 | : ICmpInst::ICMP_ULT; | |
3557 | Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); | |
3558 | APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); | |
3559 | return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal)); | |
3560 | } | |
3561 | } | |
3562 | ||
223e47cc LB |
3563 | // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to |
3564 | // "icmp (and X, mask), cst" | |
3565 | uint64_t ShAmt = 0; | |
3566 | if (Op0->hasOneUse() && | |
3567 | match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), | |
3568 | m_ConstantInt(ShAmt))))) && | |
3569 | match(Op1, m_ConstantInt(Cst1)) && | |
3570 | // Only do this when A has multiple uses. This is most important to do | |
3571 | // when it exposes other optimizations. | |
3572 | !A->hasOneUse()) { | |
3573 | unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); | |
3574 | ||
3575 | if (ShAmt < ASize) { | |
3576 | APInt MaskV = | |
3577 | APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); | |
3578 | MaskV <<= ShAmt; | |
3579 | ||
3580 | APInt CmpV = Cst1->getValue().zext(ASize); | |
3581 | CmpV <<= ShAmt; | |
3582 | ||
3583 | Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); | |
3584 | return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); | |
3585 | } | |
3586 | } | |
3587 | } | |
3588 | ||
85aaf69f SL |
3589 | // The 'cmpxchg' instruction returns an aggregate containing the old value and |
3590 | // an i1 which indicates whether or not we successfully did the swap. | |
3591 | // | |
3592 | // Replace comparisons between the old value and the expected value with the | |
3593 | // indicator that 'cmpxchg' returns. | |
3594 | // | |
3595 | // N.B. This transform is only valid when the 'cmpxchg' is not permitted to | |
3596 | // spuriously fail. In those cases, the old value may equal the expected | |
3597 | // value but it is possible for the swap to not occur. | |
3598 | if (I.getPredicate() == ICmpInst::ICMP_EQ) | |
3599 | if (auto *EVI = dyn_cast<ExtractValueInst>(Op0)) | |
3600 | if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand())) | |
3601 | if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 && | |
3602 | !ACXI->isWeak()) | |
3603 | return ExtractValueInst::Create(ACXI, 1); | |
3604 | ||
223e47cc LB |
3605 | { |
3606 | Value *X; ConstantInt *Cst; | |
3607 | // icmp X+Cst, X | |
3608 | if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) | |
1a4d82fc | 3609 | return FoldICmpAddOpCst(I, X, Cst, I.getPredicate()); |
223e47cc LB |
3610 | |
3611 | // icmp X, X+Cst | |
3612 | if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) | |
1a4d82fc | 3613 | return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); |
223e47cc | 3614 | } |
1a4d82fc | 3615 | return Changed ? &I : nullptr; |
223e47cc LB |
3616 | } |
3617 | ||
223e47cc | 3618 | /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. |
223e47cc LB |
3619 | Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, |
3620 | Instruction *LHSI, | |
3621 | Constant *RHSC) { | |
1a4d82fc | 3622 | if (!isa<ConstantFP>(RHSC)) return nullptr; |
223e47cc LB |
3623 | const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); |
3624 | ||
3625 | // Get the width of the mantissa. We don't want to hack on conversions that | |
3626 | // might lose information from the integer, e.g. "i64 -> float" | |
3627 | int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); | |
1a4d82fc | 3628 | if (MantissaWidth == -1) return nullptr; // Unknown. |
223e47cc | 3629 | |
85aaf69f SL |
3630 | IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); |
3631 | ||
223e47cc LB |
3632 | // Check to see that the input is converted from an integer type that is small |
3633 | // enough that preserves all bits. TODO: check here for "known" sign bits. | |
3634 | // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. | |
85aaf69f | 3635 | unsigned InputSize = IntTy->getScalarSizeInBits(); |
223e47cc LB |
3636 | |
3637 | // If this is a uitofp instruction, we need an extra bit to hold the sign. | |
3638 | bool LHSUnsigned = isa<UIToFPInst>(LHSI); | |
3639 | if (LHSUnsigned) | |
3640 | ++InputSize; | |
3641 | ||
85aaf69f SL |
3642 | if (I.isEquality()) { |
3643 | FCmpInst::Predicate P = I.getPredicate(); | |
3644 | bool IsExact = false; | |
3645 | APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned); | |
3646 | RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact); | |
3647 | ||
3648 | // If the floating point constant isn't an integer value, we know if we will | |
3649 | // ever compare equal / not equal to it. | |
3650 | if (!IsExact) { | |
3651 | // TODO: Can never be -0.0 and other non-representable values | |
3652 | APFloat RHSRoundInt(RHS); | |
3653 | RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); | |
3654 | if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) { | |
3655 | if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) | |
3656 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
3657 | ||
3658 | assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE); | |
3659 | return ReplaceInstUsesWith(I, Builder->getTrue()); | |
3660 | } | |
3661 | } | |
3662 | ||
3663 | // TODO: If the constant is exactly representable, is it always OK to do | |
3664 | // equality compares as integer? | |
3665 | } | |
3666 | ||
3667 | // Comparisons with zero are a special case where we know we won't lose | |
3668 | // information. | |
3669 | bool IsCmpZero = RHS.isPosZero(); | |
3670 | ||
223e47cc | 3671 | // If the conversion would lose info, don't hack on this. |
85aaf69f | 3672 | if ((int)InputSize > MantissaWidth && !IsCmpZero) |
1a4d82fc | 3673 | return nullptr; |
223e47cc LB |
3674 | |
3675 | // Otherwise, we can potentially simplify the comparison. We know that it | |
3676 | // will always come through as an integer value and we know the constant is | |
3677 | // not a NAN (it would have been previously simplified). | |
3678 | assert(!RHS.isNaN() && "NaN comparison not already folded!"); | |
3679 | ||
3680 | ICmpInst::Predicate Pred; | |
3681 | switch (I.getPredicate()) { | |
3682 | default: llvm_unreachable("Unexpected predicate!"); | |
3683 | case FCmpInst::FCMP_UEQ: | |
3684 | case FCmpInst::FCMP_OEQ: | |
3685 | Pred = ICmpInst::ICMP_EQ; | |
3686 | break; | |
3687 | case FCmpInst::FCMP_UGT: | |
3688 | case FCmpInst::FCMP_OGT: | |
3689 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; | |
3690 | break; | |
3691 | case FCmpInst::FCMP_UGE: | |
3692 | case FCmpInst::FCMP_OGE: | |
3693 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; | |
3694 | break; | |
3695 | case FCmpInst::FCMP_ULT: | |
3696 | case FCmpInst::FCMP_OLT: | |
3697 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; | |
3698 | break; | |
3699 | case FCmpInst::FCMP_ULE: | |
3700 | case FCmpInst::FCMP_OLE: | |
3701 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; | |
3702 | break; | |
3703 | case FCmpInst::FCMP_UNE: | |
3704 | case FCmpInst::FCMP_ONE: | |
3705 | Pred = ICmpInst::ICMP_NE; | |
3706 | break; | |
3707 | case FCmpInst::FCMP_ORD: | |
1a4d82fc | 3708 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc | 3709 | case FCmpInst::FCMP_UNO: |
1a4d82fc | 3710 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3711 | } |
3712 | ||
223e47cc LB |
3713 | // Now we know that the APFloat is a normal number, zero or inf. |
3714 | ||
3715 | // See if the FP constant is too large for the integer. For example, | |
3716 | // comparing an i8 to 300.0. | |
3717 | unsigned IntWidth = IntTy->getScalarSizeInBits(); | |
3718 | ||
3719 | if (!LHSUnsigned) { | |
3720 | // If the RHS value is > SignedMax, fold the comparison. This handles +INF | |
3721 | // and large values. | |
1a4d82fc | 3722 | APFloat SMax(RHS.getSemantics()); |
223e47cc LB |
3723 | SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, |
3724 | APFloat::rmNearestTiesToEven); | |
3725 | if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 | |
3726 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || | |
3727 | Pred == ICmpInst::ICMP_SLE) | |
1a4d82fc JJ |
3728 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3729 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3730 | } |
3731 | } else { | |
3732 | // If the RHS value is > UnsignedMax, fold the comparison. This handles | |
3733 | // +INF and large values. | |
1a4d82fc | 3734 | APFloat UMax(RHS.getSemantics()); |
223e47cc LB |
3735 | UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, |
3736 | APFloat::rmNearestTiesToEven); | |
3737 | if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 | |
3738 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || | |
3739 | Pred == ICmpInst::ICMP_ULE) | |
1a4d82fc JJ |
3740 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3741 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3742 | } |
3743 | } | |
3744 | ||
3745 | if (!LHSUnsigned) { | |
3746 | // See if the RHS value is < SignedMin. | |
1a4d82fc | 3747 | APFloat SMin(RHS.getSemantics()); |
223e47cc LB |
3748 | SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, |
3749 | APFloat::rmNearestTiesToEven); | |
3750 | if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 | |
3751 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || | |
3752 | Pred == ICmpInst::ICMP_SGE) | |
1a4d82fc JJ |
3753 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3754 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3755 | } |
3756 | } else { | |
3757 | // See if the RHS value is < UnsignedMin. | |
1a4d82fc | 3758 | APFloat SMin(RHS.getSemantics()); |
223e47cc LB |
3759 | SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, |
3760 | APFloat::rmNearestTiesToEven); | |
3761 | if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 | |
3762 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || | |
3763 | Pred == ICmpInst::ICMP_UGE) | |
1a4d82fc JJ |
3764 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3765 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3766 | } |
3767 | } | |
3768 | ||
3769 | // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or | |
3770 | // [0, UMAX], but it may still be fractional. See if it is fractional by | |
3771 | // casting the FP value to the integer value and back, checking for equality. | |
3772 | // Don't do this for zero, because -0.0 is not fractional. | |
3773 | Constant *RHSInt = LHSUnsigned | |
3774 | ? ConstantExpr::getFPToUI(RHSC, IntTy) | |
3775 | : ConstantExpr::getFPToSI(RHSC, IntTy); | |
3776 | if (!RHS.isZero()) { | |
3777 | bool Equal = LHSUnsigned | |
3778 | ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC | |
3779 | : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; | |
3780 | if (!Equal) { | |
3781 | // If we had a comparison against a fractional value, we have to adjust | |
3782 | // the compare predicate and sometimes the value. RHSC is rounded towards | |
3783 | // zero at this point. | |
3784 | switch (Pred) { | |
3785 | default: llvm_unreachable("Unexpected integer comparison!"); | |
3786 | case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true | |
1a4d82fc | 3787 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc | 3788 | case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false |
1a4d82fc | 3789 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3790 | case ICmpInst::ICMP_ULE: |
3791 | // (float)int <= 4.4 --> int <= 4 | |
3792 | // (float)int <= -4.4 --> false | |
3793 | if (RHS.isNegative()) | |
1a4d82fc | 3794 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3795 | break; |
3796 | case ICmpInst::ICMP_SLE: | |
3797 | // (float)int <= 4.4 --> int <= 4 | |
3798 | // (float)int <= -4.4 --> int < -4 | |
3799 | if (RHS.isNegative()) | |
3800 | Pred = ICmpInst::ICMP_SLT; | |
3801 | break; | |
3802 | case ICmpInst::ICMP_ULT: | |
3803 | // (float)int < -4.4 --> false | |
3804 | // (float)int < 4.4 --> int <= 4 | |
3805 | if (RHS.isNegative()) | |
1a4d82fc | 3806 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3807 | Pred = ICmpInst::ICMP_ULE; |
3808 | break; | |
3809 | case ICmpInst::ICMP_SLT: | |
3810 | // (float)int < -4.4 --> int < -4 | |
3811 | // (float)int < 4.4 --> int <= 4 | |
3812 | if (!RHS.isNegative()) | |
3813 | Pred = ICmpInst::ICMP_SLE; | |
3814 | break; | |
3815 | case ICmpInst::ICMP_UGT: | |
3816 | // (float)int > 4.4 --> int > 4 | |
3817 | // (float)int > -4.4 --> true | |
3818 | if (RHS.isNegative()) | |
1a4d82fc | 3819 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc LB |
3820 | break; |
3821 | case ICmpInst::ICMP_SGT: | |
3822 | // (float)int > 4.4 --> int > 4 | |
3823 | // (float)int > -4.4 --> int >= -4 | |
3824 | if (RHS.isNegative()) | |
3825 | Pred = ICmpInst::ICMP_SGE; | |
3826 | break; | |
3827 | case ICmpInst::ICMP_UGE: | |
3828 | // (float)int >= -4.4 --> true | |
3829 | // (float)int >= 4.4 --> int > 4 | |
3830 | if (RHS.isNegative()) | |
1a4d82fc | 3831 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc LB |
3832 | Pred = ICmpInst::ICMP_UGT; |
3833 | break; | |
3834 | case ICmpInst::ICMP_SGE: | |
3835 | // (float)int >= -4.4 --> int >= -4 | |
3836 | // (float)int >= 4.4 --> int > 4 | |
3837 | if (!RHS.isNegative()) | |
3838 | Pred = ICmpInst::ICMP_SGT; | |
3839 | break; | |
3840 | } | |
3841 | } | |
3842 | } | |
3843 | ||
3844 | // Lower this FP comparison into an appropriate integer version of the | |
3845 | // comparison. | |
3846 | return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); | |
3847 | } | |
3848 | ||
3849 | Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { | |
3850 | bool Changed = false; | |
3851 | ||
3852 | /// Orders the operands of the compare so that they are listed from most | |
3853 | /// complex to least complex. This puts constants before unary operators, | |
3854 | /// before binary operators. | |
3855 | if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { | |
3856 | I.swapOperands(); | |
3857 | Changed = true; | |
3858 | } | |
3859 | ||
3860 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | |
3861 | ||
85aaf69f | 3862 | if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC)) |
223e47cc LB |
3863 | return ReplaceInstUsesWith(I, V); |
3864 | ||
3865 | // Simplify 'fcmp pred X, X' | |
3866 | if (Op0 == Op1) { | |
3867 | switch (I.getPredicate()) { | |
3868 | default: llvm_unreachable("Unknown predicate!"); | |
3869 | case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) | |
3870 | case FCmpInst::FCMP_ULT: // True if unordered or less than | |
3871 | case FCmpInst::FCMP_UGT: // True if unordered or greater than | |
3872 | case FCmpInst::FCMP_UNE: // True if unordered or not equal | |
3873 | // Canonicalize these to be 'fcmp uno %X, 0.0'. | |
3874 | I.setPredicate(FCmpInst::FCMP_UNO); | |
3875 | I.setOperand(1, Constant::getNullValue(Op0->getType())); | |
3876 | return &I; | |
3877 | ||
3878 | case FCmpInst::FCMP_ORD: // True if ordered (no nans) | |
3879 | case FCmpInst::FCMP_OEQ: // True if ordered and equal | |
3880 | case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal | |
3881 | case FCmpInst::FCMP_OLE: // True if ordered and less than or equal | |
3882 | // Canonicalize these to be 'fcmp ord %X, 0.0'. | |
3883 | I.setPredicate(FCmpInst::FCMP_ORD); | |
3884 | I.setOperand(1, Constant::getNullValue(Op0->getType())); | |
3885 | return &I; | |
3886 | } | |
3887 | } | |
3888 | ||
3889 | // Handle fcmp with constant RHS | |
3890 | if (Constant *RHSC = dyn_cast<Constant>(Op1)) { | |
3891 | if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) | |
3892 | switch (LHSI->getOpcode()) { | |
3893 | case Instruction::FPExt: { | |
3894 | // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless | |
3895 | FPExtInst *LHSExt = cast<FPExtInst>(LHSI); | |
3896 | ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); | |
3897 | if (!RHSF) | |
3898 | break; | |
3899 | ||
223e47cc LB |
3900 | const fltSemantics *Sem; |
3901 | // FIXME: This shouldn't be here. | |
3902 | if (LHSExt->getSrcTy()->isHalfTy()) | |
3903 | Sem = &APFloat::IEEEhalf; | |
3904 | else if (LHSExt->getSrcTy()->isFloatTy()) | |
3905 | Sem = &APFloat::IEEEsingle; | |
3906 | else if (LHSExt->getSrcTy()->isDoubleTy()) | |
3907 | Sem = &APFloat::IEEEdouble; | |
3908 | else if (LHSExt->getSrcTy()->isFP128Ty()) | |
3909 | Sem = &APFloat::IEEEquad; | |
3910 | else if (LHSExt->getSrcTy()->isX86_FP80Ty()) | |
3911 | Sem = &APFloat::x87DoubleExtended; | |
970d7e83 LB |
3912 | else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) |
3913 | Sem = &APFloat::PPCDoubleDouble; | |
223e47cc LB |
3914 | else |
3915 | break; | |
3916 | ||
3917 | bool Lossy; | |
3918 | APFloat F = RHSF->getValueAPF(); | |
3919 | F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); | |
3920 | ||
3921 | // Avoid lossy conversions and denormals. Zero is a special case | |
3922 | // that's OK to convert. | |
3923 | APFloat Fabs = F; | |
3924 | Fabs.clearSign(); | |
3925 | if (!Lossy && | |
3926 | ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != | |
3927 | APFloat::cmpLessThan) || Fabs.isZero())) | |
3928 | ||
3929 | return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), | |
3930 | ConstantFP::get(RHSC->getContext(), F)); | |
3931 | break; | |
3932 | } | |
3933 | case Instruction::PHI: | |
3934 | // Only fold fcmp into the PHI if the phi and fcmp are in the same | |
3935 | // block. If in the same block, we're encouraging jump threading. If | |
3936 | // not, we are just pessimizing the code by making an i1 phi. | |
3937 | if (LHSI->getParent() == I.getParent()) | |
3938 | if (Instruction *NV = FoldOpIntoPhi(I)) | |
3939 | return NV; | |
3940 | break; | |
3941 | case Instruction::SIToFP: | |
3942 | case Instruction::UIToFP: | |
3943 | if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) | |
3944 | return NV; | |
3945 | break; | |
223e47cc LB |
3946 | case Instruction::FSub: { |
3947 | // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C | |
3948 | Value *Op; | |
3949 | if (match(LHSI, m_FNeg(m_Value(Op)))) | |
3950 | return new FCmpInst(I.getSwappedPredicate(), Op, | |
3951 | ConstantExpr::getFNeg(RHSC)); | |
3952 | break; | |
3953 | } | |
3954 | case Instruction::Load: | |
3955 | if (GetElementPtrInst *GEP = | |
3956 | dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { | |
3957 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | |
3958 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | |
3959 | !cast<LoadInst>(LHSI)->isVolatile()) | |
3960 | if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) | |
3961 | return Res; | |
3962 | } | |
3963 | break; | |
3964 | case Instruction::Call: { | |
85aaf69f SL |
3965 | if (!RHSC->isNullValue()) |
3966 | break; | |
3967 | ||
223e47cc | 3968 | CallInst *CI = cast<CallInst>(LHSI); |
85aaf69f SL |
3969 | const Function *F = CI->getCalledFunction(); |
3970 | if (!F) | |
3971 | break; | |
3972 | ||
223e47cc | 3973 | // Various optimization for fabs compared with zero. |
85aaf69f SL |
3974 | LibFunc::Func Func; |
3975 | if (F->getIntrinsicID() == Intrinsic::fabs || | |
3976 | (TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && | |
3977 | (Func == LibFunc::fabs || Func == LibFunc::fabsf || | |
3978 | Func == LibFunc::fabsl))) { | |
3979 | switch (I.getPredicate()) { | |
3980 | default: | |
3981 | break; | |
223e47cc | 3982 | // fabs(x) < 0 --> false |
85aaf69f SL |
3983 | case FCmpInst::FCMP_OLT: |
3984 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc | 3985 | // fabs(x) > 0 --> x != 0 |
85aaf69f SL |
3986 | case FCmpInst::FCMP_OGT: |
3987 | return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), RHSC); | |
223e47cc | 3988 | // fabs(x) <= 0 --> x == 0 |
85aaf69f SL |
3989 | case FCmpInst::FCMP_OLE: |
3990 | return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), RHSC); | |
223e47cc | 3991 | // fabs(x) >= 0 --> !isnan(x) |
85aaf69f SL |
3992 | case FCmpInst::FCMP_OGE: |
3993 | return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), RHSC); | |
223e47cc LB |
3994 | // fabs(x) == 0 --> x == 0 |
3995 | // fabs(x) != 0 --> x != 0 | |
85aaf69f SL |
3996 | case FCmpInst::FCMP_OEQ: |
3997 | case FCmpInst::FCMP_UEQ: | |
3998 | case FCmpInst::FCMP_ONE: | |
3999 | case FCmpInst::FCMP_UNE: | |
4000 | return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), RHSC); | |
223e47cc LB |
4001 | } |
4002 | } | |
4003 | } | |
4004 | } | |
4005 | } | |
4006 | ||
4007 | // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y | |
4008 | Value *X, *Y; | |
4009 | if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) | |
4010 | return new FCmpInst(I.getSwappedPredicate(), X, Y); | |
4011 | ||
4012 | // fcmp (fpext x), (fpext y) -> fcmp x, y | |
4013 | if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) | |
4014 | if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) | |
4015 | if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) | |
4016 | return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), | |
4017 | RHSExt->getOperand(0)); | |
4018 | ||
1a4d82fc | 4019 | return Changed ? &I : nullptr; |
223e47cc | 4020 | } |