]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===- InstCombineCompares.cpp --------------------------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements the visitICmp and visitFCmp functions. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | #include "InstCombine.h" | |
223e47cc LB |
15 | #include "llvm/Analysis/ConstantFolding.h" |
16 | #include "llvm/Analysis/InstructionSimplify.h" | |
17 | #include "llvm/Analysis/MemoryBuiltins.h" | |
1a4d82fc | 18 | #include "llvm/IR/ConstantRange.h" |
970d7e83 | 19 | #include "llvm/IR/DataLayout.h" |
1a4d82fc | 20 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
970d7e83 | 21 | #include "llvm/IR/IntrinsicInst.h" |
1a4d82fc | 22 | #include "llvm/IR/PatternMatch.h" |
970d7e83 | 23 | #include "llvm/Target/TargetLibraryInfo.h" |
223e47cc LB |
24 | using namespace llvm; |
25 | using namespace PatternMatch; | |
26 | ||
1a4d82fc JJ |
27 | #define DEBUG_TYPE "instcombine" |
28 | ||
223e47cc LB |
29 | static ConstantInt *getOne(Constant *C) { |
30 | return ConstantInt::get(cast<IntegerType>(C->getType()), 1); | |
31 | } | |
32 | ||
223e47cc LB |
33 | static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { |
34 | return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); | |
35 | } | |
36 | ||
37 | static bool HasAddOverflow(ConstantInt *Result, | |
38 | ConstantInt *In1, ConstantInt *In2, | |
39 | bool IsSigned) { | |
40 | if (!IsSigned) | |
41 | return Result->getValue().ult(In1->getValue()); | |
42 | ||
43 | if (In2->isNegative()) | |
44 | return Result->getValue().sgt(In1->getValue()); | |
45 | return Result->getValue().slt(In1->getValue()); | |
46 | } | |
47 | ||
48 | /// AddWithOverflow - Compute Result = In1+In2, returning true if the result | |
49 | /// overflowed for this type. | |
50 | static bool AddWithOverflow(Constant *&Result, Constant *In1, | |
51 | Constant *In2, bool IsSigned = false) { | |
52 | Result = ConstantExpr::getAdd(In1, In2); | |
53 | ||
54 | if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { | |
55 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { | |
56 | Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); | |
57 | if (HasAddOverflow(ExtractElement(Result, Idx), | |
58 | ExtractElement(In1, Idx), | |
59 | ExtractElement(In2, Idx), | |
60 | IsSigned)) | |
61 | return true; | |
62 | } | |
63 | return false; | |
64 | } | |
65 | ||
66 | return HasAddOverflow(cast<ConstantInt>(Result), | |
67 | cast<ConstantInt>(In1), cast<ConstantInt>(In2), | |
68 | IsSigned); | |
69 | } | |
70 | ||
71 | static bool HasSubOverflow(ConstantInt *Result, | |
72 | ConstantInt *In1, ConstantInt *In2, | |
73 | bool IsSigned) { | |
74 | if (!IsSigned) | |
75 | return Result->getValue().ugt(In1->getValue()); | |
76 | ||
77 | if (In2->isNegative()) | |
78 | return Result->getValue().slt(In1->getValue()); | |
79 | ||
80 | return Result->getValue().sgt(In1->getValue()); | |
81 | } | |
82 | ||
83 | /// SubWithOverflow - Compute Result = In1-In2, returning true if the result | |
84 | /// overflowed for this type. | |
85 | static bool SubWithOverflow(Constant *&Result, Constant *In1, | |
86 | Constant *In2, bool IsSigned = false) { | |
87 | Result = ConstantExpr::getSub(In1, In2); | |
88 | ||
89 | if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { | |
90 | for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { | |
91 | Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); | |
92 | if (HasSubOverflow(ExtractElement(Result, Idx), | |
93 | ExtractElement(In1, Idx), | |
94 | ExtractElement(In2, Idx), | |
95 | IsSigned)) | |
96 | return true; | |
97 | } | |
98 | return false; | |
99 | } | |
100 | ||
101 | return HasSubOverflow(cast<ConstantInt>(Result), | |
102 | cast<ConstantInt>(In1), cast<ConstantInt>(In2), | |
103 | IsSigned); | |
104 | } | |
105 | ||
106 | /// isSignBitCheck - Given an exploded icmp instruction, return true if the | |
107 | /// comparison only checks the sign bit. If it only checks the sign bit, set | |
108 | /// TrueIfSigned if the result of the comparison is true when the input value is | |
109 | /// signed. | |
110 | static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, | |
111 | bool &TrueIfSigned) { | |
112 | switch (pred) { | |
113 | case ICmpInst::ICMP_SLT: // True if LHS s< 0 | |
114 | TrueIfSigned = true; | |
115 | return RHS->isZero(); | |
116 | case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 | |
117 | TrueIfSigned = true; | |
118 | return RHS->isAllOnesValue(); | |
119 | case ICmpInst::ICMP_SGT: // True if LHS s> -1 | |
120 | TrueIfSigned = false; | |
121 | return RHS->isAllOnesValue(); | |
122 | case ICmpInst::ICMP_UGT: | |
123 | // True if LHS u> RHS and RHS == high-bit-mask - 1 | |
124 | TrueIfSigned = true; | |
125 | return RHS->isMaxValue(true); | |
126 | case ICmpInst::ICMP_UGE: | |
127 | // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) | |
128 | TrueIfSigned = true; | |
129 | return RHS->getValue().isSignBit(); | |
130 | default: | |
131 | return false; | |
132 | } | |
133 | } | |
134 | ||
1a4d82fc JJ |
135 | /// Returns true if the exploded icmp can be expressed as a signed comparison |
136 | /// to zero and updates the predicate accordingly. | |
137 | /// The signedness of the comparison is preserved. | |
138 | static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { | |
139 | if (!ICmpInst::isSigned(pred)) | |
140 | return false; | |
141 | ||
142 | if (RHS->isZero()) | |
143 | return ICmpInst::isRelational(pred); | |
144 | ||
145 | if (RHS->isOne()) { | |
146 | if (pred == ICmpInst::ICMP_SLT) { | |
147 | pred = ICmpInst::ICMP_SLE; | |
148 | return true; | |
149 | } | |
150 | } else if (RHS->isAllOnesValue()) { | |
151 | if (pred == ICmpInst::ICMP_SGT) { | |
152 | pred = ICmpInst::ICMP_SGE; | |
153 | return true; | |
154 | } | |
155 | } | |
156 | ||
157 | return false; | |
158 | } | |
159 | ||
223e47cc LB |
160 | // isHighOnes - Return true if the constant is of the form 1+0+. |
161 | // This is the same as lowones(~X). | |
162 | static bool isHighOnes(const ConstantInt *CI) { | |
163 | return (~CI->getValue() + 1).isPowerOf2(); | |
164 | } | |
165 | ||
166 | /// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a | |
167 | /// set of known zero and one bits, compute the maximum and minimum values that | |
168 | /// could have the specified known zero and known one bits, returning them in | |
169 | /// min/max. | |
170 | static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, | |
171 | const APInt& KnownOne, | |
172 | APInt& Min, APInt& Max) { | |
173 | assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && | |
174 | KnownZero.getBitWidth() == Min.getBitWidth() && | |
175 | KnownZero.getBitWidth() == Max.getBitWidth() && | |
176 | "KnownZero, KnownOne and Min, Max must have equal bitwidth."); | |
177 | APInt UnknownBits = ~(KnownZero|KnownOne); | |
178 | ||
179 | // The minimum value is when all unknown bits are zeros, EXCEPT for the sign | |
180 | // bit if it is unknown. | |
181 | Min = KnownOne; | |
182 | Max = KnownOne|UnknownBits; | |
183 | ||
184 | if (UnknownBits.isNegative()) { // Sign bit is unknown | |
185 | Min.setBit(Min.getBitWidth()-1); | |
186 | Max.clearBit(Max.getBitWidth()-1); | |
187 | } | |
188 | } | |
189 | ||
190 | // ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and | |
191 | // a set of known zero and one bits, compute the maximum and minimum values that | |
192 | // could have the specified known zero and known one bits, returning them in | |
193 | // min/max. | |
194 | static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, | |
195 | const APInt &KnownOne, | |
196 | APInt &Min, APInt &Max) { | |
197 | assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && | |
198 | KnownZero.getBitWidth() == Min.getBitWidth() && | |
199 | KnownZero.getBitWidth() == Max.getBitWidth() && | |
200 | "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); | |
201 | APInt UnknownBits = ~(KnownZero|KnownOne); | |
202 | ||
203 | // The minimum value is when the unknown bits are all zeros. | |
204 | Min = KnownOne; | |
205 | // The maximum value is when the unknown bits are all ones. | |
206 | Max = KnownOne|UnknownBits; | |
207 | } | |
208 | ||
209 | ||
210 | ||
211 | /// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: | |
212 | /// cmp pred (load (gep GV, ...)), cmpcst | |
213 | /// where GV is a global variable with a constant initializer. Try to simplify | |
214 | /// this into some simple computation that does not need the load. For example | |
215 | /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". | |
216 | /// | |
217 | /// If AndCst is non-null, then the loaded value is masked with that constant | |
218 | /// before doing the comparison. This handles cases like "A[i]&4 == 0". | |
219 | Instruction *InstCombiner:: | |
220 | FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, | |
221 | CmpInst &ICI, ConstantInt *AndCst) { | |
222 | // We need TD information to know the pointer size unless this is inbounds. | |
1a4d82fc JJ |
223 | if (!GEP->isInBounds() && !DL) |
224 | return nullptr; | |
223e47cc LB |
225 | |
226 | Constant *Init = GV->getInitializer(); | |
227 | if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) | |
1a4d82fc JJ |
228 | return nullptr; |
229 | ||
223e47cc | 230 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); |
1a4d82fc | 231 | if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays. |
223e47cc LB |
232 | |
233 | // There are many forms of this optimization we can handle, for now, just do | |
234 | // the simple index into a single-dimensional array. | |
235 | // | |
236 | // Require: GEP GV, 0, i {{, constant indices}} | |
237 | if (GEP->getNumOperands() < 3 || | |
238 | !isa<ConstantInt>(GEP->getOperand(1)) || | |
239 | !cast<ConstantInt>(GEP->getOperand(1))->isZero() || | |
240 | isa<Constant>(GEP->getOperand(2))) | |
1a4d82fc | 241 | return nullptr; |
223e47cc LB |
242 | |
243 | // Check that indices after the variable are constants and in-range for the | |
244 | // type they index. Collect the indices. This is typically for arrays of | |
245 | // structs. | |
246 | SmallVector<unsigned, 4> LaterIndices; | |
247 | ||
248 | Type *EltTy = Init->getType()->getArrayElementType(); | |
249 | for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { | |
250 | ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); | |
1a4d82fc | 251 | if (!Idx) return nullptr; // Variable index. |
223e47cc LB |
252 | |
253 | uint64_t IdxVal = Idx->getZExtValue(); | |
1a4d82fc | 254 | if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. |
223e47cc LB |
255 | |
256 | if (StructType *STy = dyn_cast<StructType>(EltTy)) | |
257 | EltTy = STy->getElementType(IdxVal); | |
258 | else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { | |
1a4d82fc | 259 | if (IdxVal >= ATy->getNumElements()) return nullptr; |
223e47cc LB |
260 | EltTy = ATy->getElementType(); |
261 | } else { | |
1a4d82fc | 262 | return nullptr; // Unknown type. |
223e47cc LB |
263 | } |
264 | ||
265 | LaterIndices.push_back(IdxVal); | |
266 | } | |
267 | ||
268 | enum { Overdefined = -3, Undefined = -2 }; | |
269 | ||
270 | // Variables for our state machines. | |
271 | ||
272 | // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form | |
273 | // "i == 47 | i == 87", where 47 is the first index the condition is true for, | |
274 | // and 87 is the second (and last) index. FirstTrueElement is -2 when | |
275 | // undefined, otherwise set to the first true element. SecondTrueElement is | |
276 | // -2 when undefined, -3 when overdefined and >= 0 when that index is true. | |
277 | int FirstTrueElement = Undefined, SecondTrueElement = Undefined; | |
278 | ||
279 | // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the | |
280 | // form "i != 47 & i != 87". Same state transitions as for true elements. | |
281 | int FirstFalseElement = Undefined, SecondFalseElement = Undefined; | |
282 | ||
283 | /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these | |
284 | /// define a state machine that triggers for ranges of values that the index | |
285 | /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. | |
286 | /// This is -2 when undefined, -3 when overdefined, and otherwise the last | |
287 | /// index in the range (inclusive). We use -2 for undefined here because we | |
288 | /// use relative comparisons and don't want 0-1 to match -1. | |
289 | int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; | |
290 | ||
291 | // MagicBitvector - This is a magic bitvector where we set a bit if the | |
292 | // comparison is true for element 'i'. If there are 64 elements or less in | |
293 | // the array, this will fully represent all the comparison results. | |
294 | uint64_t MagicBitvector = 0; | |
295 | ||
296 | ||
297 | // Scan the array and see if one of our patterns matches. | |
298 | Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); | |
299 | for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { | |
300 | Constant *Elt = Init->getAggregateElement(i); | |
1a4d82fc | 301 | if (!Elt) return nullptr; |
223e47cc LB |
302 | |
303 | // If this is indexing an array of structures, get the structure element. | |
304 | if (!LaterIndices.empty()) | |
305 | Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); | |
306 | ||
307 | // If the element is masked, handle it. | |
308 | if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); | |
309 | ||
310 | // Find out if the comparison would be true or false for the i'th element. | |
311 | Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, | |
1a4d82fc | 312 | CompareRHS, DL, TLI); |
223e47cc LB |
313 | // If the result is undef for this element, ignore it. |
314 | if (isa<UndefValue>(C)) { | |
315 | // Extend range state machines to cover this element in case there is an | |
316 | // undef in the middle of the range. | |
317 | if (TrueRangeEnd == (int)i-1) | |
318 | TrueRangeEnd = i; | |
319 | if (FalseRangeEnd == (int)i-1) | |
320 | FalseRangeEnd = i; | |
321 | continue; | |
322 | } | |
323 | ||
324 | // If we can't compute the result for any of the elements, we have to give | |
325 | // up evaluating the entire conditional. | |
1a4d82fc | 326 | if (!isa<ConstantInt>(C)) return nullptr; |
223e47cc LB |
327 | |
328 | // Otherwise, we know if the comparison is true or false for this element, | |
329 | // update our state machines. | |
330 | bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); | |
331 | ||
332 | // State machine for single/double/range index comparison. | |
333 | if (IsTrueForElt) { | |
334 | // Update the TrueElement state machine. | |
335 | if (FirstTrueElement == Undefined) | |
336 | FirstTrueElement = TrueRangeEnd = i; // First true element. | |
337 | else { | |
338 | // Update double-compare state machine. | |
339 | if (SecondTrueElement == Undefined) | |
340 | SecondTrueElement = i; | |
341 | else | |
342 | SecondTrueElement = Overdefined; | |
343 | ||
344 | // Update range state machine. | |
345 | if (TrueRangeEnd == (int)i-1) | |
346 | TrueRangeEnd = i; | |
347 | else | |
348 | TrueRangeEnd = Overdefined; | |
349 | } | |
350 | } else { | |
351 | // Update the FalseElement state machine. | |
352 | if (FirstFalseElement == Undefined) | |
353 | FirstFalseElement = FalseRangeEnd = i; // First false element. | |
354 | else { | |
355 | // Update double-compare state machine. | |
356 | if (SecondFalseElement == Undefined) | |
357 | SecondFalseElement = i; | |
358 | else | |
359 | SecondFalseElement = Overdefined; | |
360 | ||
361 | // Update range state machine. | |
362 | if (FalseRangeEnd == (int)i-1) | |
363 | FalseRangeEnd = i; | |
364 | else | |
365 | FalseRangeEnd = Overdefined; | |
366 | } | |
367 | } | |
368 | ||
369 | ||
370 | // If this element is in range, update our magic bitvector. | |
371 | if (i < 64 && IsTrueForElt) | |
372 | MagicBitvector |= 1ULL << i; | |
373 | ||
374 | // If all of our states become overdefined, bail out early. Since the | |
375 | // predicate is expensive, only check it every 8 elements. This is only | |
376 | // really useful for really huge arrays. | |
377 | if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && | |
378 | SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && | |
379 | FalseRangeEnd == Overdefined) | |
1a4d82fc | 380 | return nullptr; |
223e47cc LB |
381 | } |
382 | ||
383 | // Now that we've scanned the entire array, emit our new comparison(s). We | |
384 | // order the state machines in complexity of the generated code. | |
385 | Value *Idx = GEP->getOperand(2); | |
386 | ||
387 | // If the index is larger than the pointer size of the target, truncate the | |
388 | // index down like the GEP would do implicitly. We don't have to do this for | |
389 | // an inbounds GEP because the index can't be out of range. | |
1a4d82fc JJ |
390 | if (!GEP->isInBounds()) { |
391 | Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); | |
392 | unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); | |
393 | if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) | |
394 | Idx = Builder->CreateTrunc(Idx, IntPtrTy); | |
395 | } | |
223e47cc LB |
396 | |
397 | // If the comparison is only true for one or two elements, emit direct | |
398 | // comparisons. | |
399 | if (SecondTrueElement != Overdefined) { | |
400 | // None true -> false. | |
401 | if (FirstTrueElement == Undefined) | |
1a4d82fc | 402 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc LB |
403 | |
404 | Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); | |
405 | ||
406 | // True for one element -> 'i == 47'. | |
407 | if (SecondTrueElement == Undefined) | |
408 | return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); | |
409 | ||
410 | // True for two elements -> 'i == 47 | i == 72'. | |
411 | Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); | |
412 | Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); | |
413 | Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); | |
414 | return BinaryOperator::CreateOr(C1, C2); | |
415 | } | |
416 | ||
417 | // If the comparison is only false for one or two elements, emit direct | |
418 | // comparisons. | |
419 | if (SecondFalseElement != Overdefined) { | |
420 | // None false -> true. | |
421 | if (FirstFalseElement == Undefined) | |
1a4d82fc | 422 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
423 | |
424 | Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); | |
425 | ||
426 | // False for one element -> 'i != 47'. | |
427 | if (SecondFalseElement == Undefined) | |
428 | return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); | |
429 | ||
430 | // False for two elements -> 'i != 47 & i != 72'. | |
431 | Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); | |
432 | Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); | |
433 | Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); | |
434 | return BinaryOperator::CreateAnd(C1, C2); | |
435 | } | |
436 | ||
437 | // If the comparison can be replaced with a range comparison for the elements | |
438 | // where it is true, emit the range check. | |
439 | if (TrueRangeEnd != Overdefined) { | |
440 | assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); | |
441 | ||
442 | // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). | |
443 | if (FirstTrueElement) { | |
444 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); | |
445 | Idx = Builder->CreateAdd(Idx, Offs); | |
446 | } | |
447 | ||
448 | Value *End = ConstantInt::get(Idx->getType(), | |
449 | TrueRangeEnd-FirstTrueElement+1); | |
450 | return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); | |
451 | } | |
452 | ||
453 | // False range check. | |
454 | if (FalseRangeEnd != Overdefined) { | |
455 | assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); | |
456 | // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). | |
457 | if (FirstFalseElement) { | |
458 | Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); | |
459 | Idx = Builder->CreateAdd(Idx, Offs); | |
460 | } | |
461 | ||
462 | Value *End = ConstantInt::get(Idx->getType(), | |
463 | FalseRangeEnd-FirstFalseElement); | |
464 | return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); | |
465 | } | |
466 | ||
467 | ||
1a4d82fc | 468 | // If a magic bitvector captures the entire comparison state |
223e47cc LB |
469 | // of this load, replace it with computation that does: |
470 | // ((magic_cst >> i) & 1) != 0 | |
1a4d82fc JJ |
471 | { |
472 | Type *Ty = nullptr; | |
473 | ||
474 | // Look for an appropriate type: | |
475 | // - The type of Idx if the magic fits | |
476 | // - The smallest fitting legal type if we have a DataLayout | |
477 | // - Default to i32 | |
478 | if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) | |
479 | Ty = Idx->getType(); | |
480 | else if (DL) | |
481 | Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); | |
482 | else if (ArrayElementCount <= 32) | |
223e47cc | 483 | Ty = Type::getInt32Ty(Init->getContext()); |
1a4d82fc JJ |
484 | |
485 | if (Ty) { | |
486 | Value *V = Builder->CreateIntCast(Idx, Ty, false); | |
487 | V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); | |
488 | V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); | |
489 | return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); | |
490 | } | |
223e47cc LB |
491 | } |
492 | ||
1a4d82fc | 493 | return nullptr; |
223e47cc LB |
494 | } |
495 | ||
496 | ||
497 | /// EvaluateGEPOffsetExpression - Return a value that can be used to compare | |
498 | /// the *offset* implied by a GEP to zero. For example, if we have &A[i], we | |
499 | /// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can | |
500 | /// be complex, and scales are involved. The above expression would also be | |
501 | /// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). | |
502 | /// This later form is less amenable to optimization though, and we are allowed | |
503 | /// to generate the first by knowing that pointer arithmetic doesn't overflow. | |
504 | /// | |
505 | /// If we can't emit an optimized form for this expression, this returns null. | |
506 | /// | |
507 | static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { | |
1a4d82fc | 508 | const DataLayout &DL = *IC.getDataLayout(); |
223e47cc LB |
509 | gep_type_iterator GTI = gep_type_begin(GEP); |
510 | ||
511 | // Check to see if this gep only has a single variable index. If so, and if | |
512 | // any constant indices are a multiple of its scale, then we can compute this | |
513 | // in terms of the scale of the variable index. For example, if the GEP | |
514 | // implies an offset of "12 + i*4", then we can codegen this as "3 + i", | |
515 | // because the expression will cross zero at the same point. | |
516 | unsigned i, e = GEP->getNumOperands(); | |
517 | int64_t Offset = 0; | |
518 | for (i = 1; i != e; ++i, ++GTI) { | |
519 | if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { | |
520 | // Compute the aggregate offset of constant indices. | |
521 | if (CI->isZero()) continue; | |
522 | ||
523 | // Handle a struct index, which adds its field offset to the pointer. | |
524 | if (StructType *STy = dyn_cast<StructType>(*GTI)) { | |
1a4d82fc | 525 | Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); |
223e47cc | 526 | } else { |
1a4d82fc | 527 | uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); |
223e47cc LB |
528 | Offset += Size*CI->getSExtValue(); |
529 | } | |
530 | } else { | |
531 | // Found our variable index. | |
532 | break; | |
533 | } | |
534 | } | |
535 | ||
536 | // If there are no variable indices, we must have a constant offset, just | |
537 | // evaluate it the general way. | |
1a4d82fc | 538 | if (i == e) return nullptr; |
223e47cc LB |
539 | |
540 | Value *VariableIdx = GEP->getOperand(i); | |
541 | // Determine the scale factor of the variable element. For example, this is | |
542 | // 4 if the variable index is into an array of i32. | |
1a4d82fc | 543 | uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType()); |
223e47cc LB |
544 | |
545 | // Verify that there are no other variable indices. If so, emit the hard way. | |
546 | for (++i, ++GTI; i != e; ++i, ++GTI) { | |
547 | ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); | |
1a4d82fc | 548 | if (!CI) return nullptr; |
223e47cc LB |
549 | |
550 | // Compute the aggregate offset of constant indices. | |
551 | if (CI->isZero()) continue; | |
552 | ||
553 | // Handle a struct index, which adds its field offset to the pointer. | |
554 | if (StructType *STy = dyn_cast<StructType>(*GTI)) { | |
1a4d82fc | 555 | Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); |
223e47cc | 556 | } else { |
1a4d82fc | 557 | uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); |
223e47cc LB |
558 | Offset += Size*CI->getSExtValue(); |
559 | } | |
560 | } | |
561 | ||
1a4d82fc JJ |
562 | |
563 | ||
223e47cc LB |
564 | // Okay, we know we have a single variable index, which must be a |
565 | // pointer/array/vector index. If there is no offset, life is simple, return | |
566 | // the index. | |
1a4d82fc JJ |
567 | Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType()); |
568 | unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth(); | |
223e47cc LB |
569 | if (Offset == 0) { |
570 | // Cast to intptrty in case a truncation occurs. If an extension is needed, | |
571 | // we don't need to bother extending: the extension won't affect where the | |
572 | // computation crosses zero. | |
573 | if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { | |
223e47cc LB |
574 | VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); |
575 | } | |
576 | return VariableIdx; | |
577 | } | |
578 | ||
579 | // Otherwise, there is an index. The computation we will do will be modulo | |
580 | // the pointer size, so get it. | |
581 | uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); | |
582 | ||
583 | Offset &= PtrSizeMask; | |
584 | VariableScale &= PtrSizeMask; | |
585 | ||
586 | // To do this transformation, any constant index must be a multiple of the | |
587 | // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", | |
588 | // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a | |
589 | // multiple of the variable scale. | |
590 | int64_t NewOffs = Offset / (int64_t)VariableScale; | |
591 | if (Offset != NewOffs*(int64_t)VariableScale) | |
1a4d82fc | 592 | return nullptr; |
223e47cc LB |
593 | |
594 | // Okay, we can do this evaluation. Start by converting the index to intptr. | |
223e47cc LB |
595 | if (VariableIdx->getType() != IntPtrTy) |
596 | VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, | |
597 | true /*Signed*/); | |
598 | Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); | |
599 | return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); | |
600 | } | |
601 | ||
602 | /// FoldGEPICmp - Fold comparisons between a GEP instruction and something | |
603 | /// else. At this point we know that the GEP is on the LHS of the comparison. | |
604 | Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, | |
605 | ICmpInst::Predicate Cond, | |
606 | Instruction &I) { | |
607 | // Don't transform signed compares of GEPs into index compares. Even if the | |
608 | // GEP is inbounds, the final add of the base pointer can have signed overflow | |
609 | // and would change the result of the icmp. | |
610 | // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be | |
611 | // the maximum signed value for the pointer type. | |
612 | if (ICmpInst::isSigned(Cond)) | |
1a4d82fc | 613 | return nullptr; |
223e47cc | 614 | |
1a4d82fc JJ |
615 | // Look through bitcasts and addrspacecasts. We do not however want to remove |
616 | // 0 GEPs. | |
617 | if (!isa<GetElementPtrInst>(RHS)) | |
618 | RHS = RHS->stripPointerCasts(); | |
223e47cc LB |
619 | |
620 | Value *PtrBase = GEPLHS->getOperand(0); | |
1a4d82fc | 621 | if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { |
223e47cc LB |
622 | // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). |
623 | // This transformation (ignoring the base and scales) is valid because we | |
624 | // know pointers can't overflow since the gep is inbounds. See if we can | |
625 | // output an optimized form. | |
626 | Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); | |
627 | ||
628 | // If not, synthesize the offset the hard way. | |
1a4d82fc | 629 | if (!Offset) |
223e47cc LB |
630 | Offset = EmitGEPOffset(GEPLHS); |
631 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, | |
632 | Constant::getNullValue(Offset->getType())); | |
633 | } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { | |
634 | // If the base pointers are different, but the indices are the same, just | |
635 | // compare the base pointer. | |
636 | if (PtrBase != GEPRHS->getOperand(0)) { | |
637 | bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); | |
638 | IndicesTheSame &= GEPLHS->getOperand(0)->getType() == | |
639 | GEPRHS->getOperand(0)->getType(); | |
640 | if (IndicesTheSame) | |
641 | for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) | |
642 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { | |
643 | IndicesTheSame = false; | |
644 | break; | |
645 | } | |
646 | ||
647 | // If all indices are the same, just compare the base pointers. | |
648 | if (IndicesTheSame) | |
1a4d82fc | 649 | return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0)); |
223e47cc LB |
650 | |
651 | // If we're comparing GEPs with two base pointers that only differ in type | |
652 | // and both GEPs have only constant indices or just one use, then fold | |
653 | // the compare with the adjusted indices. | |
1a4d82fc | 654 | if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() && |
223e47cc LB |
655 | (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && |
656 | (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && | |
657 | PtrBase->stripPointerCasts() == | |
658 | GEPRHS->getOperand(0)->stripPointerCasts()) { | |
1a4d82fc JJ |
659 | Value *LOffset = EmitGEPOffset(GEPLHS); |
660 | Value *ROffset = EmitGEPOffset(GEPRHS); | |
661 | ||
662 | // If we looked through an addrspacecast between different sized address | |
663 | // spaces, the LHS and RHS pointers are different sized | |
664 | // integers. Truncate to the smaller one. | |
665 | Type *LHSIndexTy = LOffset->getType(); | |
666 | Type *RHSIndexTy = ROffset->getType(); | |
667 | if (LHSIndexTy != RHSIndexTy) { | |
668 | if (LHSIndexTy->getPrimitiveSizeInBits() < | |
669 | RHSIndexTy->getPrimitiveSizeInBits()) { | |
670 | ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); | |
671 | } else | |
672 | LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); | |
673 | } | |
674 | ||
223e47cc | 675 | Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), |
1a4d82fc | 676 | LOffset, ROffset); |
223e47cc LB |
677 | return ReplaceInstUsesWith(I, Cmp); |
678 | } | |
679 | ||
680 | // Otherwise, the base pointers are different and the indices are | |
681 | // different, bail out. | |
1a4d82fc | 682 | return nullptr; |
223e47cc LB |
683 | } |
684 | ||
685 | // If one of the GEPs has all zero indices, recurse. | |
1a4d82fc | 686 | if (GEPLHS->hasAllZeroIndices()) |
223e47cc | 687 | return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), |
1a4d82fc | 688 | ICmpInst::getSwappedPredicate(Cond), I); |
223e47cc LB |
689 | |
690 | // If the other GEP has all zero indices, recurse. | |
1a4d82fc | 691 | if (GEPRHS->hasAllZeroIndices()) |
223e47cc LB |
692 | return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); |
693 | ||
694 | bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); | |
695 | if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { | |
696 | // If the GEPs only differ by one index, compare it. | |
697 | unsigned NumDifferences = 0; // Keep track of # differences. | |
698 | unsigned DiffOperand = 0; // The operand that differs. | |
699 | for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) | |
700 | if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { | |
701 | if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != | |
702 | GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { | |
703 | // Irreconcilable differences. | |
704 | NumDifferences = 2; | |
705 | break; | |
706 | } else { | |
707 | if (NumDifferences++) break; | |
708 | DiffOperand = i; | |
709 | } | |
710 | } | |
711 | ||
712 | if (NumDifferences == 0) // SAME GEP? | |
713 | return ReplaceInstUsesWith(I, // No comparison is needed here. | |
1a4d82fc | 714 | Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); |
223e47cc LB |
715 | |
716 | else if (NumDifferences == 1 && GEPsInBounds) { | |
717 | Value *LHSV = GEPLHS->getOperand(DiffOperand); | |
718 | Value *RHSV = GEPRHS->getOperand(DiffOperand); | |
719 | // Make sure we do a signed comparison here. | |
720 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); | |
721 | } | |
722 | } | |
723 | ||
724 | // Only lower this if the icmp is the only user of the GEP or if we expect | |
725 | // the result to fold to a constant! | |
1a4d82fc | 726 | if (DL && |
223e47cc LB |
727 | GEPsInBounds && |
728 | (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && | |
729 | (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { | |
730 | // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) | |
731 | Value *L = EmitGEPOffset(GEPLHS); | |
732 | Value *R = EmitGEPOffset(GEPRHS); | |
733 | return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); | |
734 | } | |
735 | } | |
1a4d82fc | 736 | return nullptr; |
223e47cc LB |
737 | } |
738 | ||
739 | /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". | |
1a4d82fc | 740 | Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, |
223e47cc | 741 | Value *X, ConstantInt *CI, |
1a4d82fc | 742 | ICmpInst::Predicate Pred) { |
223e47cc LB |
743 | // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, |
744 | // so the values can never be equal. Similarly for all other "or equals" | |
745 | // operators. | |
746 | ||
747 | // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 | |
748 | // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 | |
749 | // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 | |
750 | if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { | |
751 | Value *R = | |
752 | ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); | |
753 | return new ICmpInst(ICmpInst::ICMP_UGT, X, R); | |
754 | } | |
755 | ||
756 | // (X+1) >u X --> X <u (0-1) --> X != 255 | |
757 | // (X+2) >u X --> X <u (0-2) --> X <u 254 | |
758 | // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 | |
759 | if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) | |
760 | return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); | |
761 | ||
762 | unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); | |
763 | ConstantInt *SMax = ConstantInt::get(X->getContext(), | |
764 | APInt::getSignedMaxValue(BitWidth)); | |
765 | ||
766 | // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 | |
767 | // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 | |
768 | // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 | |
769 | // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 | |
770 | // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 | |
771 | // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 | |
772 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) | |
773 | return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); | |
774 | ||
775 | // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 | |
776 | // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 | |
777 | // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 | |
778 | // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 | |
779 | // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 | |
780 | // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 | |
781 | ||
782 | assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); | |
1a4d82fc | 783 | Constant *C = Builder->getInt(CI->getValue()-1); |
223e47cc LB |
784 | return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); |
785 | } | |
786 | ||
787 | /// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS | |
788 | /// and CmpRHS are both known to be integer constants. | |
789 | Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, | |
790 | ConstantInt *DivRHS) { | |
791 | ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); | |
792 | const APInt &CmpRHSV = CmpRHS->getValue(); | |
793 | ||
794 | // FIXME: If the operand types don't match the type of the divide | |
795 | // then don't attempt this transform. The code below doesn't have the | |
796 | // logic to deal with a signed divide and an unsigned compare (and | |
797 | // vice versa). This is because (x /s C1) <s C2 produces different | |
798 | // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even | |
799 | // (x /u C1) <u C2. Simply casting the operands and result won't | |
800 | // work. :( The if statement below tests that condition and bails | |
801 | // if it finds it. | |
802 | bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; | |
803 | if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) | |
1a4d82fc | 804 | return nullptr; |
223e47cc | 805 | if (DivRHS->isZero()) |
1a4d82fc | 806 | return nullptr; // The ProdOV computation fails on divide by zero. |
223e47cc | 807 | if (DivIsSigned && DivRHS->isAllOnesValue()) |
1a4d82fc | 808 | return nullptr; // The overflow computation also screws up here |
223e47cc LB |
809 | if (DivRHS->isOne()) { |
810 | // This eliminates some funny cases with INT_MIN. | |
811 | ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. | |
812 | return &ICI; | |
813 | } | |
814 | ||
815 | // Compute Prod = CI * DivRHS. We are essentially solving an equation | |
816 | // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and | |
817 | // C2 (CI). By solving for X we can turn this into a range check | |
818 | // instead of computing a divide. | |
819 | Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); | |
820 | ||
821 | // Determine if the product overflows by seeing if the product is | |
822 | // not equal to the divide. Make sure we do the same kind of divide | |
823 | // as in the LHS instruction that we're folding. | |
824 | bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : | |
825 | ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; | |
826 | ||
827 | // Get the ICmp opcode | |
828 | ICmpInst::Predicate Pred = ICI.getPredicate(); | |
829 | ||
830 | /// If the division is known to be exact, then there is no remainder from the | |
831 | /// divide, so the covered range size is unit, otherwise it is the divisor. | |
832 | ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; | |
833 | ||
834 | // Figure out the interval that is being checked. For example, a comparison | |
835 | // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). | |
836 | // Compute this interval based on the constants involved and the signedness of | |
837 | // the compare/divide. This computes a half-open interval, keeping track of | |
838 | // whether either value in the interval overflows. After analysis each | |
839 | // overflow variable is set to 0 if it's corresponding bound variable is valid | |
840 | // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. | |
841 | int LoOverflow = 0, HiOverflow = 0; | |
1a4d82fc | 842 | Constant *LoBound = nullptr, *HiBound = nullptr; |
223e47cc LB |
843 | |
844 | if (!DivIsSigned) { // udiv | |
845 | // e.g. X/5 op 3 --> [15, 20) | |
846 | LoBound = Prod; | |
847 | HiOverflow = LoOverflow = ProdOV; | |
848 | if (!HiOverflow) { | |
849 | // If this is not an exact divide, then many values in the range collapse | |
850 | // to the same result value. | |
851 | HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); | |
852 | } | |
853 | ||
854 | } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. | |
855 | if (CmpRHSV == 0) { // (X / pos) op 0 | |
856 | // Can't overflow. e.g. X/2 op 0 --> [-1, 2) | |
857 | LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); | |
858 | HiBound = RangeSize; | |
859 | } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos | |
860 | LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) | |
861 | HiOverflow = LoOverflow = ProdOV; | |
862 | if (!HiOverflow) | |
863 | HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); | |
864 | } else { // (X / pos) op neg | |
865 | // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) | |
866 | HiBound = AddOne(Prod); | |
867 | LoOverflow = HiOverflow = ProdOV ? -1 : 0; | |
868 | if (!LoOverflow) { | |
869 | ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); | |
870 | LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; | |
871 | } | |
872 | } | |
873 | } else if (DivRHS->isNegative()) { // Divisor is < 0. | |
874 | if (DivI->isExact()) | |
875 | RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); | |
876 | if (CmpRHSV == 0) { // (X / neg) op 0 | |
877 | // e.g. X/-5 op 0 --> [-4, 5) | |
878 | LoBound = AddOne(RangeSize); | |
879 | HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); | |
880 | if (HiBound == DivRHS) { // -INTMIN = INTMIN | |
881 | HiOverflow = 1; // [INTMIN+1, overflow) | |
1a4d82fc | 882 | HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN |
223e47cc LB |
883 | } |
884 | } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos | |
885 | // e.g. X/-5 op 3 --> [-19, -14) | |
886 | HiBound = AddOne(Prod); | |
887 | HiOverflow = LoOverflow = ProdOV ? -1 : 0; | |
888 | if (!LoOverflow) | |
889 | LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; | |
890 | } else { // (X / neg) op neg | |
891 | LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) | |
892 | LoOverflow = HiOverflow = ProdOV; | |
893 | if (!HiOverflow) | |
894 | HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); | |
895 | } | |
896 | ||
897 | // Dividing by a negative swaps the condition. LT <-> GT | |
898 | Pred = ICmpInst::getSwappedPredicate(Pred); | |
899 | } | |
900 | ||
901 | Value *X = DivI->getOperand(0); | |
902 | switch (Pred) { | |
903 | default: llvm_unreachable("Unhandled icmp opcode!"); | |
904 | case ICmpInst::ICMP_EQ: | |
905 | if (LoOverflow && HiOverflow) | |
1a4d82fc | 906 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc LB |
907 | if (HiOverflow) |
908 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : | |
909 | ICmpInst::ICMP_UGE, X, LoBound); | |
910 | if (LoOverflow) | |
911 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : | |
912 | ICmpInst::ICMP_ULT, X, HiBound); | |
913 | return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, | |
914 | DivIsSigned, true)); | |
915 | case ICmpInst::ICMP_NE: | |
916 | if (LoOverflow && HiOverflow) | |
1a4d82fc | 917 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
918 | if (HiOverflow) |
919 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : | |
920 | ICmpInst::ICMP_ULT, X, LoBound); | |
921 | if (LoOverflow) | |
922 | return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : | |
923 | ICmpInst::ICMP_UGE, X, HiBound); | |
924 | return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, | |
925 | DivIsSigned, false)); | |
926 | case ICmpInst::ICMP_ULT: | |
927 | case ICmpInst::ICMP_SLT: | |
928 | if (LoOverflow == +1) // Low bound is greater than input range. | |
1a4d82fc | 929 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc | 930 | if (LoOverflow == -1) // Low bound is less than input range. |
1a4d82fc | 931 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc LB |
932 | return new ICmpInst(Pred, X, LoBound); |
933 | case ICmpInst::ICMP_UGT: | |
934 | case ICmpInst::ICMP_SGT: | |
935 | if (HiOverflow == +1) // High bound greater than input range. | |
1a4d82fc | 936 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc | 937 | if (HiOverflow == -1) // High bound less than input range. |
1a4d82fc | 938 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
939 | if (Pred == ICmpInst::ICMP_UGT) |
940 | return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); | |
941 | return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); | |
942 | } | |
943 | } | |
944 | ||
945 | /// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". | |
946 | Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, | |
947 | ConstantInt *ShAmt) { | |
948 | const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue(); | |
949 | ||
950 | // Check that the shift amount is in range. If not, don't perform | |
951 | // undefined shifts. When the shift is visited it will be | |
952 | // simplified. | |
953 | uint32_t TypeBits = CmpRHSV.getBitWidth(); | |
954 | uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); | |
955 | if (ShAmtVal >= TypeBits || ShAmtVal == 0) | |
1a4d82fc | 956 | return nullptr; |
223e47cc LB |
957 | |
958 | if (!ICI.isEquality()) { | |
959 | // If we have an unsigned comparison and an ashr, we can't simplify this. | |
960 | // Similarly for signed comparisons with lshr. | |
961 | if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) | |
1a4d82fc | 962 | return nullptr; |
223e47cc LB |
963 | |
964 | // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv | |
965 | // by a power of 2. Since we already have logic to simplify these, | |
966 | // transform to div and then simplify the resultant comparison. | |
967 | if (Shr->getOpcode() == Instruction::AShr && | |
968 | (!Shr->isExact() || ShAmtVal == TypeBits - 1)) | |
1a4d82fc | 969 | return nullptr; |
223e47cc LB |
970 | |
971 | // Revisit the shift (to delete it). | |
972 | Worklist.Add(Shr); | |
973 | ||
974 | Constant *DivCst = | |
975 | ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); | |
976 | ||
977 | Value *Tmp = | |
978 | Shr->getOpcode() == Instruction::AShr ? | |
979 | Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : | |
980 | Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); | |
981 | ||
982 | ICI.setOperand(0, Tmp); | |
983 | ||
984 | // If the builder folded the binop, just return it. | |
985 | BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); | |
1a4d82fc | 986 | if (!TheDiv) |
223e47cc LB |
987 | return &ICI; |
988 | ||
989 | // Otherwise, fold this div/compare. | |
990 | assert(TheDiv->getOpcode() == Instruction::SDiv || | |
991 | TheDiv->getOpcode() == Instruction::UDiv); | |
992 | ||
993 | Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst)); | |
994 | assert(Res && "This div/cst should have folded!"); | |
995 | return Res; | |
996 | } | |
997 | ||
998 | ||
999 | // If we are comparing against bits always shifted out, the | |
1000 | // comparison cannot succeed. | |
1001 | APInt Comp = CmpRHSV << ShAmtVal; | |
1a4d82fc | 1002 | ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp); |
223e47cc LB |
1003 | if (Shr->getOpcode() == Instruction::LShr) |
1004 | Comp = Comp.lshr(ShAmtVal); | |
1005 | else | |
1006 | Comp = Comp.ashr(ShAmtVal); | |
1007 | ||
1008 | if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. | |
1009 | bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; | |
1a4d82fc | 1010 | Constant *Cst = Builder->getInt1(IsICMP_NE); |
223e47cc LB |
1011 | return ReplaceInstUsesWith(ICI, Cst); |
1012 | } | |
1013 | ||
1014 | // Otherwise, check to see if the bits shifted out are known to be zero. | |
1015 | // If so, we can compare against the unshifted value: | |
1016 | // (X & 4) >> 1 == 2 --> (X & 4) == 4. | |
1017 | if (Shr->hasOneUse() && Shr->isExact()) | |
1018 | return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); | |
1019 | ||
1020 | if (Shr->hasOneUse()) { | |
1021 | // Otherwise strength reduce the shift into an and. | |
1022 | APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); | |
1a4d82fc | 1023 | Constant *Mask = Builder->getInt(Val); |
223e47cc LB |
1024 | |
1025 | Value *And = Builder->CreateAnd(Shr->getOperand(0), | |
1026 | Mask, Shr->getName()+".mask"); | |
1027 | return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); | |
1028 | } | |
1a4d82fc | 1029 | return nullptr; |
223e47cc LB |
1030 | } |
1031 | ||
1a4d82fc JJ |
1032 | /// FoldICmpCstShrCst - Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" -> |
1033 | /// (icmp eq/ne A, Log2(const2/const1)) -> | |
1034 | /// (icmp eq/ne A, Log2(const2) - Log2(const1)). | |
1035 | Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A, | |
1036 | ConstantInt *CI1, | |
1037 | ConstantInt *CI2) { | |
1038 | assert(I.isEquality() && "Cannot fold icmp gt/lt"); | |
1039 | ||
1040 | auto getConstant = [&I, this](bool IsTrue) { | |
1041 | if (I.getPredicate() == I.ICMP_NE) | |
1042 | IsTrue = !IsTrue; | |
1043 | return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue)); | |
1044 | }; | |
1045 | ||
1046 | auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) { | |
1047 | if (I.getPredicate() == I.ICMP_NE) | |
1048 | Pred = CmpInst::getInversePredicate(Pred); | |
1049 | return new ICmpInst(Pred, LHS, RHS); | |
1050 | }; | |
1051 | ||
1052 | APInt AP1 = CI1->getValue(); | |
1053 | APInt AP2 = CI2->getValue(); | |
1054 | ||
1055 | if (!AP1) { | |
1056 | if (!AP2) { | |
1057 | // Both Constants are 0. | |
1058 | return getConstant(true); | |
1059 | } | |
1060 | ||
1061 | if (cast<BinaryOperator>(Op)->isExact()) | |
1062 | return getConstant(false); | |
1063 | ||
1064 | if (AP2.isNegative()) { | |
1065 | // MSB is set, so a lshr with a large enough 'A' would be undefined. | |
1066 | return getConstant(false); | |
1067 | } | |
1068 | ||
1069 | // 'A' must be large enough to shift out the highest set bit. | |
1070 | return getICmp(I.ICMP_UGT, A, | |
1071 | ConstantInt::get(A->getType(), AP2.logBase2())); | |
1072 | } | |
1073 | ||
1074 | if (!AP2) { | |
1075 | // Shifting 0 by any value gives 0. | |
1076 | return getConstant(false); | |
1077 | } | |
1078 | ||
1079 | bool IsAShr = isa<AShrOperator>(Op); | |
1080 | if (AP1 == AP2) { | |
1081 | if (AP1.isAllOnesValue() && IsAShr) { | |
1082 | // Arithmatic shift of -1 is always -1. | |
1083 | return getConstant(true); | |
1084 | } | |
1085 | return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); | |
1086 | } | |
1087 | ||
1088 | bool IsNegative = false; | |
1089 | if (IsAShr) { | |
1090 | if (AP1.isNegative() != AP2.isNegative()) { | |
1091 | // Arithmetic shift will never change the sign. | |
1092 | return getConstant(false); | |
1093 | } | |
1094 | // Both the constants are negative, take their positive to calculate log. | |
1095 | if (AP1.isNegative()) { | |
1096 | if (AP1.slt(AP2)) | |
1097 | // Right-shifting won't increase the magnitude. | |
1098 | return getConstant(false); | |
1099 | IsNegative = true; | |
1100 | } | |
1101 | } | |
1102 | ||
1103 | if (!IsNegative && AP1.ugt(AP2)) | |
1104 | // Right-shifting will not increase the value. | |
1105 | return getConstant(false); | |
1106 | ||
1107 | // Get the distance between the highest bit that's set. | |
1108 | int Shift; | |
1109 | if (IsNegative) | |
1110 | Shift = (-AP2).logBase2() - (-AP1).logBase2(); | |
1111 | else | |
1112 | Shift = AP2.logBase2() - AP1.logBase2(); | |
1113 | ||
1114 | if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift)) | |
1115 | return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); | |
1116 | ||
1117 | // Shifting const2 will never be equal to const1. | |
1118 | return getConstant(false); | |
1119 | } | |
223e47cc LB |
1120 | |
1121 | /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". | |
1122 | /// | |
1123 | Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, | |
1124 | Instruction *LHSI, | |
1125 | ConstantInt *RHS) { | |
1126 | const APInt &RHSV = RHS->getValue(); | |
1127 | ||
1128 | switch (LHSI->getOpcode()) { | |
1129 | case Instruction::Trunc: | |
1130 | if (ICI.isEquality() && LHSI->hasOneUse()) { | |
1131 | // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all | |
1132 | // of the high bits truncated out of x are known. | |
1133 | unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), | |
1134 | SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); | |
1135 | APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); | |
1a4d82fc | 1136 | computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI); |
223e47cc LB |
1137 | |
1138 | // If all the high bits are known, we can do this xform. | |
1139 | if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { | |
1140 | // Pull in the high bits from known-ones set. | |
1141 | APInt NewRHS = RHS->getValue().zext(SrcBits); | |
1142 | NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); | |
1143 | return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), | |
1a4d82fc | 1144 | Builder->getInt(NewRHS)); |
223e47cc LB |
1145 | } |
1146 | } | |
1147 | break; | |
1148 | ||
1a4d82fc JJ |
1149 | case Instruction::Xor: // (icmp pred (xor X, XorCst), CI) |
1150 | if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { | |
223e47cc LB |
1151 | // If this is a comparison that tests the signbit (X < 0) or (x > -1), |
1152 | // fold the xor. | |
1153 | if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || | |
1154 | (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { | |
1155 | Value *CompareVal = LHSI->getOperand(0); | |
1156 | ||
1a4d82fc | 1157 | // If the sign bit of the XorCst is not set, there is no change to |
223e47cc | 1158 | // the operation, just stop using the Xor. |
1a4d82fc | 1159 | if (!XorCst->isNegative()) { |
223e47cc LB |
1160 | ICI.setOperand(0, CompareVal); |
1161 | Worklist.Add(LHSI); | |
1162 | return &ICI; | |
1163 | } | |
1164 | ||
1165 | // Was the old condition true if the operand is positive? | |
1166 | bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; | |
1167 | ||
1168 | // If so, the new one isn't. | |
1169 | isTrueIfPositive ^= true; | |
1170 | ||
1171 | if (isTrueIfPositive) | |
1172 | return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, | |
1173 | SubOne(RHS)); | |
1174 | else | |
1175 | return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, | |
1176 | AddOne(RHS)); | |
1177 | } | |
1178 | ||
1179 | if (LHSI->hasOneUse()) { | |
1180 | // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) | |
1a4d82fc JJ |
1181 | if (!ICI.isEquality() && XorCst->getValue().isSignBit()) { |
1182 | const APInt &SignBit = XorCst->getValue(); | |
223e47cc LB |
1183 | ICmpInst::Predicate Pred = ICI.isSigned() |
1184 | ? ICI.getUnsignedPredicate() | |
1185 | : ICI.getSignedPredicate(); | |
1186 | return new ICmpInst(Pred, LHSI->getOperand(0), | |
1a4d82fc | 1187 | Builder->getInt(RHSV ^ SignBit)); |
223e47cc LB |
1188 | } |
1189 | ||
1190 | // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) | |
1a4d82fc JJ |
1191 | if (!ICI.isEquality() && XorCst->isMaxValue(true)) { |
1192 | const APInt &NotSignBit = XorCst->getValue(); | |
223e47cc LB |
1193 | ICmpInst::Predicate Pred = ICI.isSigned() |
1194 | ? ICI.getUnsignedPredicate() | |
1195 | : ICI.getSignedPredicate(); | |
1196 | Pred = ICI.getSwappedPredicate(Pred); | |
1197 | return new ICmpInst(Pred, LHSI->getOperand(0), | |
1a4d82fc | 1198 | Builder->getInt(RHSV ^ NotSignBit)); |
223e47cc LB |
1199 | } |
1200 | } | |
1a4d82fc JJ |
1201 | |
1202 | // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) | |
1203 | // iff -C is a power of 2 | |
1204 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT && | |
1205 | XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2()) | |
1206 | return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst); | |
1207 | ||
1208 | // (icmp ult (xor X, C), -C) -> (icmp uge X, C) | |
1209 | // iff -C is a power of 2 | |
1210 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT && | |
1211 | XorCst->getValue() == -RHSV && RHSV.isPowerOf2()) | |
1212 | return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst); | |
223e47cc LB |
1213 | } |
1214 | break; | |
1a4d82fc | 1215 | case Instruction::And: // (icmp pred (and X, AndCst), RHS) |
223e47cc LB |
1216 | if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && |
1217 | LHSI->getOperand(0)->hasOneUse()) { | |
1a4d82fc | 1218 | ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1)); |
223e47cc LB |
1219 | |
1220 | // If the LHS is an AND of a truncating cast, we can widen the | |
1221 | // and/compare to be the input width without changing the value | |
1222 | // produced, eliminating a cast. | |
1223 | if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { | |
1224 | // We can do this transformation if either the AND constant does not | |
1225 | // have its sign bit set or if it is an equality comparison. | |
1226 | // Extending a relational comparison when we're checking the sign | |
1227 | // bit would not work. | |
1228 | if (ICI.isEquality() || | |
1a4d82fc | 1229 | (!AndCst->isNegative() && RHSV.isNonNegative())) { |
223e47cc LB |
1230 | Value *NewAnd = |
1231 | Builder->CreateAnd(Cast->getOperand(0), | |
1a4d82fc | 1232 | ConstantExpr::getZExt(AndCst, Cast->getSrcTy())); |
223e47cc LB |
1233 | NewAnd->takeName(LHSI); |
1234 | return new ICmpInst(ICI.getPredicate(), NewAnd, | |
1235 | ConstantExpr::getZExt(RHS, Cast->getSrcTy())); | |
1236 | } | |
1237 | } | |
1238 | ||
1239 | // If the LHS is an AND of a zext, and we have an equality compare, we can | |
1240 | // shrink the and/compare to the smaller type, eliminating the cast. | |
1241 | if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { | |
1242 | IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); | |
1243 | // Make sure we don't compare the upper bits, SimplifyDemandedBits | |
1244 | // should fold the icmp to true/false in that case. | |
1245 | if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { | |
1246 | Value *NewAnd = | |
1247 | Builder->CreateAnd(Cast->getOperand(0), | |
1a4d82fc | 1248 | ConstantExpr::getTrunc(AndCst, Ty)); |
223e47cc LB |
1249 | NewAnd->takeName(LHSI); |
1250 | return new ICmpInst(ICI.getPredicate(), NewAnd, | |
1251 | ConstantExpr::getTrunc(RHS, Ty)); | |
1252 | } | |
1253 | } | |
1254 | ||
1255 | // If this is: (X >> C1) & C2 != C3 (where any shift and any compare | |
1256 | // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This | |
1257 | // happens a LOT in code produced by the C front-end, for bitfield | |
1258 | // access. | |
1259 | BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); | |
1260 | if (Shift && !Shift->isShift()) | |
1a4d82fc | 1261 | Shift = nullptr; |
223e47cc LB |
1262 | |
1263 | ConstantInt *ShAmt; | |
1a4d82fc | 1264 | ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr; |
223e47cc | 1265 | |
1a4d82fc JJ |
1266 | // This seemingly simple opportunity to fold away a shift turns out to |
1267 | // be rather complicated. See PR17827 | |
1268 | // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details. | |
223e47cc | 1269 | if (ShAmt) { |
1a4d82fc JJ |
1270 | bool CanFold = false; |
1271 | unsigned ShiftOpcode = Shift->getOpcode(); | |
1272 | if (ShiftOpcode == Instruction::AShr) { | |
1273 | // There may be some constraints that make this possible, | |
1274 | // but nothing simple has been discovered yet. | |
1275 | CanFold = false; | |
1276 | } else if (ShiftOpcode == Instruction::Shl) { | |
1277 | // For a left shift, we can fold if the comparison is not signed. | |
1278 | // We can also fold a signed comparison if the mask value and | |
1279 | // comparison value are not negative. These constraints may not be | |
1280 | // obvious, but we can prove that they are correct using an SMT | |
1281 | // solver. | |
1282 | if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative())) | |
223e47cc | 1283 | CanFold = true; |
1a4d82fc JJ |
1284 | } else if (ShiftOpcode == Instruction::LShr) { |
1285 | // For a logical right shift, we can fold if the comparison is not | |
1286 | // signed. We can also fold a signed comparison if the shifted mask | |
1287 | // value and the shifted comparison value are not negative. | |
1288 | // These constraints may not be obvious, but we can prove that they | |
1289 | // are correct using an SMT solver. | |
1290 | if (!ICI.isSigned()) | |
1291 | CanFold = true; | |
1292 | else { | |
1293 | ConstantInt *ShiftedAndCst = | |
1294 | cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt)); | |
1295 | ConstantInt *ShiftedRHSCst = | |
1296 | cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt)); | |
1297 | ||
1298 | if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative()) | |
1299 | CanFold = true; | |
1300 | } | |
223e47cc LB |
1301 | } |
1302 | ||
1303 | if (CanFold) { | |
1304 | Constant *NewCst; | |
1a4d82fc | 1305 | if (ShiftOpcode == Instruction::Shl) |
223e47cc LB |
1306 | NewCst = ConstantExpr::getLShr(RHS, ShAmt); |
1307 | else | |
1308 | NewCst = ConstantExpr::getShl(RHS, ShAmt); | |
1309 | ||
1310 | // Check to see if we are shifting out any of the bits being | |
1311 | // compared. | |
1a4d82fc | 1312 | if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) { |
223e47cc LB |
1313 | // If we shifted bits out, the fold is not going to work out. |
1314 | // As a special case, check to see if this means that the | |
1315 | // result is always true or false now. | |
1316 | if (ICI.getPredicate() == ICmpInst::ICMP_EQ) | |
1a4d82fc | 1317 | return ReplaceInstUsesWith(ICI, Builder->getFalse()); |
223e47cc | 1318 | if (ICI.getPredicate() == ICmpInst::ICMP_NE) |
1a4d82fc | 1319 | return ReplaceInstUsesWith(ICI, Builder->getTrue()); |
223e47cc LB |
1320 | } else { |
1321 | ICI.setOperand(1, NewCst); | |
1a4d82fc JJ |
1322 | Constant *NewAndCst; |
1323 | if (ShiftOpcode == Instruction::Shl) | |
1324 | NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt); | |
223e47cc | 1325 | else |
1a4d82fc JJ |
1326 | NewAndCst = ConstantExpr::getShl(AndCst, ShAmt); |
1327 | LHSI->setOperand(1, NewAndCst); | |
223e47cc LB |
1328 | LHSI->setOperand(0, Shift->getOperand(0)); |
1329 | Worklist.Add(Shift); // Shift is dead. | |
1330 | return &ICI; | |
1331 | } | |
1332 | } | |
1333 | } | |
1334 | ||
1335 | // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is | |
1336 | // preferable because it allows the C<<Y expression to be hoisted out | |
1337 | // of a loop if Y is invariant and X is not. | |
1338 | if (Shift && Shift->hasOneUse() && RHSV == 0 && | |
1339 | ICI.isEquality() && !Shift->isArithmeticShift() && | |
1340 | !isa<Constant>(Shift->getOperand(0))) { | |
1341 | // Compute C << Y. | |
1342 | Value *NS; | |
1343 | if (Shift->getOpcode() == Instruction::LShr) { | |
1a4d82fc | 1344 | NS = Builder->CreateShl(AndCst, Shift->getOperand(1)); |
223e47cc LB |
1345 | } else { |
1346 | // Insert a logical shift. | |
1a4d82fc | 1347 | NS = Builder->CreateLShr(AndCst, Shift->getOperand(1)); |
223e47cc LB |
1348 | } |
1349 | ||
1350 | // Compute X & (C << Y). | |
1351 | Value *NewAnd = | |
1352 | Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); | |
1353 | ||
1354 | ICI.setOperand(0, NewAnd); | |
1355 | return &ICI; | |
1356 | } | |
970d7e83 | 1357 | |
1a4d82fc JJ |
1358 | // (icmp pred (and (or (lshr X, Y), X), 1), 0) --> |
1359 | // (icmp pred (and X, (or (shl 1, Y), 1), 0)) | |
1360 | // | |
1361 | // iff pred isn't signed | |
1362 | { | |
1363 | Value *X, *Y, *LShr; | |
1364 | if (!ICI.isSigned() && RHSV == 0) { | |
1365 | if (match(LHSI->getOperand(1), m_One())) { | |
1366 | Constant *One = cast<Constant>(LHSI->getOperand(1)); | |
1367 | Value *Or = LHSI->getOperand(0); | |
1368 | if (match(Or, m_Or(m_Value(LShr), m_Value(X))) && | |
1369 | match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) { | |
1370 | unsigned UsesRemoved = 0; | |
1371 | if (LHSI->hasOneUse()) | |
1372 | ++UsesRemoved; | |
1373 | if (Or->hasOneUse()) | |
1374 | ++UsesRemoved; | |
1375 | if (LShr->hasOneUse()) | |
1376 | ++UsesRemoved; | |
1377 | Value *NewOr = nullptr; | |
1378 | // Compute X & ((1 << Y) | 1) | |
1379 | if (auto *C = dyn_cast<Constant>(Y)) { | |
1380 | if (UsesRemoved >= 1) | |
1381 | NewOr = | |
1382 | ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); | |
1383 | } else { | |
1384 | if (UsesRemoved >= 3) | |
1385 | NewOr = Builder->CreateOr(Builder->CreateShl(One, Y, | |
1386 | LShr->getName(), | |
1387 | /*HasNUW=*/true), | |
1388 | One, Or->getName()); | |
1389 | } | |
1390 | if (NewOr) { | |
1391 | Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName()); | |
1392 | ICI.setOperand(0, NewAnd); | |
1393 | return &ICI; | |
1394 | } | |
1395 | } | |
1396 | } | |
1397 | } | |
1398 | } | |
1399 | ||
1400 | // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any | |
1401 | // bit set in (X & AndCst) will produce a result greater than RHSV. | |
970d7e83 | 1402 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { |
1a4d82fc JJ |
1403 | unsigned NTZ = AndCst->getValue().countTrailingZeros(); |
1404 | if ((NTZ < AndCst->getBitWidth()) && | |
1405 | APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV)) | |
970d7e83 LB |
1406 | return new ICmpInst(ICmpInst::ICMP_NE, LHSI, |
1407 | Constant::getNullValue(RHS->getType())); | |
1408 | } | |
223e47cc LB |
1409 | } |
1410 | ||
1411 | // Try to optimize things like "A[i]&42 == 0" to index computations. | |
1412 | if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { | |
1413 | if (GetElementPtrInst *GEP = | |
1414 | dyn_cast<GetElementPtrInst>(LI->getOperand(0))) | |
1415 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | |
1416 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | |
1417 | !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) { | |
1418 | ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1)); | |
1419 | if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) | |
1420 | return Res; | |
1421 | } | |
1422 | } | |
1a4d82fc JJ |
1423 | |
1424 | // X & -C == -C -> X > u ~C | |
1425 | // X & -C != -C -> X <= u ~C | |
1426 | // iff C is a power of 2 | |
1427 | if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2()) | |
1428 | return new ICmpInst( | |
1429 | ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT | |
1430 | : ICmpInst::ICMP_ULE, | |
1431 | LHSI->getOperand(0), SubOne(RHS)); | |
223e47cc LB |
1432 | break; |
1433 | ||
1434 | case Instruction::Or: { | |
1435 | if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) | |
1436 | break; | |
1437 | Value *P, *Q; | |
1438 | if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { | |
1439 | // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 | |
1440 | // -> and (icmp eq P, null), (icmp eq Q, null). | |
1441 | Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, | |
1442 | Constant::getNullValue(P->getType())); | |
1443 | Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, | |
1444 | Constant::getNullValue(Q->getType())); | |
1445 | Instruction *Op; | |
1446 | if (ICI.getPredicate() == ICmpInst::ICMP_EQ) | |
1447 | Op = BinaryOperator::CreateAnd(ICIP, ICIQ); | |
1448 | else | |
1449 | Op = BinaryOperator::CreateOr(ICIP, ICIQ); | |
1450 | return Op; | |
1451 | } | |
1452 | break; | |
1453 | } | |
1454 | ||
1a4d82fc JJ |
1455 | case Instruction::Mul: { // (icmp pred (mul X, Val), CI) |
1456 | ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); | |
1457 | if (!Val) break; | |
1458 | ||
1459 | // If this is a signed comparison to 0 and the mul is sign preserving, | |
1460 | // use the mul LHS operand instead. | |
1461 | ICmpInst::Predicate pred = ICI.getPredicate(); | |
1462 | if (isSignTest(pred, RHS) && !Val->isZero() && | |
1463 | cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) | |
1464 | return new ICmpInst(Val->isNegative() ? | |
1465 | ICmpInst::getSwappedPredicate(pred) : pred, | |
1466 | LHSI->getOperand(0), | |
1467 | Constant::getNullValue(RHS->getType())); | |
1468 | ||
1469 | break; | |
1470 | } | |
1471 | ||
223e47cc | 1472 | case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) |
1a4d82fc | 1473 | uint32_t TypeBits = RHSV.getBitWidth(); |
223e47cc | 1474 | ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); |
1a4d82fc JJ |
1475 | if (!ShAmt) { |
1476 | Value *X; | |
1477 | // (1 << X) pred P2 -> X pred Log2(P2) | |
1478 | if (match(LHSI, m_Shl(m_One(), m_Value(X)))) { | |
1479 | bool RHSVIsPowerOf2 = RHSV.isPowerOf2(); | |
1480 | ICmpInst::Predicate Pred = ICI.getPredicate(); | |
1481 | if (ICI.isUnsigned()) { | |
1482 | if (!RHSVIsPowerOf2) { | |
1483 | // (1 << X) < 30 -> X <= 4 | |
1484 | // (1 << X) <= 30 -> X <= 4 | |
1485 | // (1 << X) >= 30 -> X > 4 | |
1486 | // (1 << X) > 30 -> X > 4 | |
1487 | if (Pred == ICmpInst::ICMP_ULT) | |
1488 | Pred = ICmpInst::ICMP_ULE; | |
1489 | else if (Pred == ICmpInst::ICMP_UGE) | |
1490 | Pred = ICmpInst::ICMP_UGT; | |
1491 | } | |
1492 | unsigned RHSLog2 = RHSV.logBase2(); | |
1493 | ||
1494 | // (1 << X) >= 2147483648 -> X >= 31 -> X == 31 | |
1495 | // (1 << X) < 2147483648 -> X < 31 -> X != 31 | |
1496 | if (RHSLog2 == TypeBits-1) { | |
1497 | if (Pred == ICmpInst::ICMP_UGE) | |
1498 | Pred = ICmpInst::ICMP_EQ; | |
1499 | else if (Pred == ICmpInst::ICMP_ULT) | |
1500 | Pred = ICmpInst::ICMP_NE; | |
1501 | } | |
223e47cc | 1502 | |
1a4d82fc JJ |
1503 | return new ICmpInst(Pred, X, |
1504 | ConstantInt::get(RHS->getType(), RHSLog2)); | |
1505 | } else if (ICI.isSigned()) { | |
1506 | if (RHSV.isAllOnesValue()) { | |
1507 | // (1 << X) <= -1 -> X == 31 | |
1508 | if (Pred == ICmpInst::ICMP_SLE) | |
1509 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
1510 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1511 | ||
1512 | // (1 << X) > -1 -> X != 31 | |
1513 | if (Pred == ICmpInst::ICMP_SGT) | |
1514 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
1515 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1516 | } else if (!RHSV) { | |
1517 | // (1 << X) < 0 -> X == 31 | |
1518 | // (1 << X) <= 0 -> X == 31 | |
1519 | if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) | |
1520 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
1521 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1522 | ||
1523 | // (1 << X) >= 0 -> X != 31 | |
1524 | // (1 << X) > 0 -> X != 31 | |
1525 | if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) | |
1526 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
1527 | ConstantInt::get(RHS->getType(), TypeBits-1)); | |
1528 | } | |
1529 | } else if (ICI.isEquality()) { | |
1530 | if (RHSVIsPowerOf2) | |
1531 | return new ICmpInst( | |
1532 | Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2())); | |
1533 | } | |
1534 | } | |
1535 | break; | |
1536 | } | |
223e47cc LB |
1537 | |
1538 | // Check that the shift amount is in range. If not, don't perform | |
1539 | // undefined shifts. When the shift is visited it will be | |
1540 | // simplified. | |
1541 | if (ShAmt->uge(TypeBits)) | |
1542 | break; | |
1543 | ||
1544 | if (ICI.isEquality()) { | |
1545 | // If we are comparing against bits always shifted out, the | |
1546 | // comparison cannot succeed. | |
1547 | Constant *Comp = | |
1548 | ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), | |
1549 | ShAmt); | |
1550 | if (Comp != RHS) {// Comparing against a bit that we know is zero. | |
1551 | bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; | |
1a4d82fc | 1552 | Constant *Cst = Builder->getInt1(IsICMP_NE); |
223e47cc LB |
1553 | return ReplaceInstUsesWith(ICI, Cst); |
1554 | } | |
1555 | ||
1556 | // If the shift is NUW, then it is just shifting out zeros, no need for an | |
1557 | // AND. | |
1558 | if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap()) | |
1559 | return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), | |
1560 | ConstantExpr::getLShr(RHS, ShAmt)); | |
1561 | ||
1a4d82fc JJ |
1562 | // If the shift is NSW and we compare to 0, then it is just shifting out |
1563 | // sign bits, no need for an AND either. | |
1564 | if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) | |
1565 | return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), | |
1566 | ConstantExpr::getLShr(RHS, ShAmt)); | |
1567 | ||
223e47cc LB |
1568 | if (LHSI->hasOneUse()) { |
1569 | // Otherwise strength reduce the shift into an and. | |
1570 | uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); | |
1a4d82fc JJ |
1571 | Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits, |
1572 | TypeBits - ShAmtVal)); | |
223e47cc LB |
1573 | |
1574 | Value *And = | |
1575 | Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); | |
1576 | return new ICmpInst(ICI.getPredicate(), And, | |
1577 | ConstantExpr::getLShr(RHS, ShAmt)); | |
1578 | } | |
1579 | } | |
1580 | ||
1a4d82fc JJ |
1581 | // If this is a signed comparison to 0 and the shift is sign preserving, |
1582 | // use the shift LHS operand instead. | |
1583 | ICmpInst::Predicate pred = ICI.getPredicate(); | |
1584 | if (isSignTest(pred, RHS) && | |
1585 | cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) | |
1586 | return new ICmpInst(pred, | |
1587 | LHSI->getOperand(0), | |
1588 | Constant::getNullValue(RHS->getType())); | |
1589 | ||
223e47cc LB |
1590 | // Otherwise, if this is a comparison of the sign bit, simplify to and/test. |
1591 | bool TrueIfSigned = false; | |
1592 | if (LHSI->hasOneUse() && | |
1593 | isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { | |
1594 | // (X << 31) <s 0 --> (X&1) != 0 | |
1595 | Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(), | |
1596 | APInt::getOneBitSet(TypeBits, | |
1597 | TypeBits-ShAmt->getZExtValue()-1)); | |
1598 | Value *And = | |
1599 | Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); | |
1600 | return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, | |
1601 | And, Constant::getNullValue(And->getType())); | |
1602 | } | |
970d7e83 LB |
1603 | |
1604 | // Transform (icmp pred iM (shl iM %v, N), CI) | |
1605 | // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) | |
1606 | // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. | |
1607 | // This enables to get rid of the shift in favor of a trunc which can be | |
1608 | // free on the target. It has the additional benefit of comparing to a | |
1609 | // smaller constant, which will be target friendly. | |
1610 | unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); | |
1611 | if (LHSI->hasOneUse() && | |
1612 | Amt != 0 && RHSV.countTrailingZeros() >= Amt) { | |
1613 | Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); | |
1614 | Constant *NCI = ConstantExpr::getTrunc( | |
1615 | ConstantExpr::getAShr(RHS, | |
1616 | ConstantInt::get(RHS->getType(), Amt)), | |
1617 | NTy); | |
1618 | return new ICmpInst(ICI.getPredicate(), | |
1619 | Builder->CreateTrunc(LHSI->getOperand(0), NTy), | |
1620 | NCI); | |
1621 | } | |
1622 | ||
223e47cc LB |
1623 | break; |
1624 | } | |
1625 | ||
1626 | case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) | |
1627 | case Instruction::AShr: { | |
1628 | // Handle equality comparisons of shift-by-constant. | |
1629 | BinaryOperator *BO = cast<BinaryOperator>(LHSI); | |
1630 | if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { | |
1631 | if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) | |
1632 | return Res; | |
1633 | } | |
1634 | ||
1635 | // Handle exact shr's. | |
1636 | if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { | |
1637 | if (RHSV.isMinValue()) | |
1638 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); | |
1639 | } | |
1640 | break; | |
1641 | } | |
1642 | ||
1643 | case Instruction::SDiv: | |
1644 | case Instruction::UDiv: | |
1645 | // Fold: icmp pred ([us]div X, C1), C2 -> range test | |
1646 | // Fold this div into the comparison, producing a range check. | |
1647 | // Determine, based on the divide type, what the range is being | |
1648 | // checked. If there is an overflow on the low or high side, remember | |
1649 | // it, otherwise compute the range [low, hi) bounding the new value. | |
1650 | // See: InsertRangeTest above for the kinds of replacements possible. | |
1651 | if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) | |
1652 | if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), | |
1653 | DivRHS)) | |
1654 | return R; | |
1655 | break; | |
1656 | ||
1a4d82fc JJ |
1657 | case Instruction::Sub: { |
1658 | ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0)); | |
1659 | if (!LHSC) break; | |
1660 | const APInt &LHSV = LHSC->getValue(); | |
1661 | ||
1662 | // C1-X <u C2 -> (X|(C2-1)) == C1 | |
1663 | // iff C1 & (C2-1) == C2-1 | |
1664 | // C2 is a power of 2 | |
1665 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && | |
1666 | RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1)) | |
1667 | return new ICmpInst(ICmpInst::ICMP_EQ, | |
1668 | Builder->CreateOr(LHSI->getOperand(1), RHSV - 1), | |
1669 | LHSC); | |
1670 | ||
1671 | // C1-X >u C2 -> (X|C2) != C1 | |
1672 | // iff C1 & C2 == C2 | |
1673 | // C2+1 is a power of 2 | |
1674 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && | |
1675 | (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV) | |
1676 | return new ICmpInst(ICmpInst::ICMP_NE, | |
1677 | Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC); | |
1678 | break; | |
1679 | } | |
1680 | ||
223e47cc LB |
1681 | case Instruction::Add: |
1682 | // Fold: icmp pred (add X, C1), C2 | |
1683 | if (!ICI.isEquality()) { | |
1684 | ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); | |
1685 | if (!LHSC) break; | |
1686 | const APInt &LHSV = LHSC->getValue(); | |
1687 | ||
1688 | ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) | |
1689 | .subtract(LHSV); | |
1690 | ||
1691 | if (ICI.isSigned()) { | |
1692 | if (CR.getLower().isSignBit()) { | |
1693 | return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), | |
1a4d82fc | 1694 | Builder->getInt(CR.getUpper())); |
223e47cc LB |
1695 | } else if (CR.getUpper().isSignBit()) { |
1696 | return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), | |
1a4d82fc | 1697 | Builder->getInt(CR.getLower())); |
223e47cc LB |
1698 | } |
1699 | } else { | |
1700 | if (CR.getLower().isMinValue()) { | |
1701 | return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), | |
1a4d82fc | 1702 | Builder->getInt(CR.getUpper())); |
223e47cc LB |
1703 | } else if (CR.getUpper().isMinValue()) { |
1704 | return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), | |
1a4d82fc | 1705 | Builder->getInt(CR.getLower())); |
223e47cc LB |
1706 | } |
1707 | } | |
1a4d82fc JJ |
1708 | |
1709 | // X-C1 <u C2 -> (X & -C2) == C1 | |
1710 | // iff C1 & (C2-1) == 0 | |
1711 | // C2 is a power of 2 | |
1712 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() && | |
1713 | RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0) | |
1714 | return new ICmpInst(ICmpInst::ICMP_EQ, | |
1715 | Builder->CreateAnd(LHSI->getOperand(0), -RHSV), | |
1716 | ConstantExpr::getNeg(LHSC)); | |
1717 | ||
1718 | // X-C1 >u C2 -> (X & ~C2) != C1 | |
1719 | // iff C1 & C2 == 0 | |
1720 | // C2+1 is a power of 2 | |
1721 | if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() && | |
1722 | (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0) | |
1723 | return new ICmpInst(ICmpInst::ICMP_NE, | |
1724 | Builder->CreateAnd(LHSI->getOperand(0), ~RHSV), | |
1725 | ConstantExpr::getNeg(LHSC)); | |
223e47cc LB |
1726 | } |
1727 | break; | |
1728 | } | |
1729 | ||
1730 | // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. | |
1731 | if (ICI.isEquality()) { | |
1732 | bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; | |
1733 | ||
1734 | // If the first operand is (add|sub|and|or|xor|rem) with a constant, and | |
1735 | // the second operand is a constant, simplify a bit. | |
1736 | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { | |
1737 | switch (BO->getOpcode()) { | |
1738 | case Instruction::SRem: | |
1739 | // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. | |
1740 | if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ | |
1741 | const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); | |
1742 | if (V.sgt(1) && V.isPowerOf2()) { | |
1743 | Value *NewRem = | |
1744 | Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), | |
1745 | BO->getName()); | |
1746 | return new ICmpInst(ICI.getPredicate(), NewRem, | |
1747 | Constant::getNullValue(BO->getType())); | |
1748 | } | |
1749 | } | |
1750 | break; | |
1751 | case Instruction::Add: | |
1752 | // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. | |
1753 | if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1754 | if (BO->hasOneUse()) | |
1755 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1756 | ConstantExpr::getSub(RHS, BOp1C)); | |
1757 | } else if (RHSV == 0) { | |
1758 | // Replace ((add A, B) != 0) with (A != -B) if A or B is | |
1759 | // efficiently invertible, or if the add has just this one use. | |
1760 | Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); | |
1761 | ||
1762 | if (Value *NegVal = dyn_castNegVal(BOp1)) | |
1763 | return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); | |
1764 | if (Value *NegVal = dyn_castNegVal(BOp0)) | |
1765 | return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); | |
1766 | if (BO->hasOneUse()) { | |
1767 | Value *Neg = Builder->CreateNeg(BOp1); | |
1768 | Neg->takeName(BO); | |
1769 | return new ICmpInst(ICI.getPredicate(), BOp0, Neg); | |
1770 | } | |
1771 | } | |
1772 | break; | |
1773 | case Instruction::Xor: | |
1774 | // For the xor case, we can xor two constants together, eliminating | |
1775 | // the explicit xor. | |
1776 | if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { | |
1777 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1778 | ConstantExpr::getXor(RHS, BOC)); | |
1779 | } else if (RHSV == 0) { | |
1780 | // Replace ((xor A, B) != 0) with (A != B) | |
1781 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1782 | BO->getOperand(1)); | |
1783 | } | |
1784 | break; | |
1785 | case Instruction::Sub: | |
1786 | // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. | |
1787 | if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { | |
1788 | if (BO->hasOneUse()) | |
1789 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), | |
1790 | ConstantExpr::getSub(BOp0C, RHS)); | |
1791 | } else if (RHSV == 0) { | |
1792 | // Replace ((sub A, B) != 0) with (A != B) | |
1793 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1794 | BO->getOperand(1)); | |
1795 | } | |
1796 | break; | |
1797 | case Instruction::Or: | |
1798 | // If bits are being or'd in that are not present in the constant we | |
1799 | // are comparing against, then the comparison could never succeed! | |
1800 | if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1801 | Constant *NotCI = ConstantExpr::getNot(RHS); | |
1802 | if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) | |
1a4d82fc | 1803 | return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); |
223e47cc LB |
1804 | } |
1805 | break; | |
1806 | ||
1807 | case Instruction::And: | |
1808 | if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1809 | // If bits are being compared against that are and'd out, then the | |
1810 | // comparison can never succeed! | |
1811 | if ((RHSV & ~BOC->getValue()) != 0) | |
1a4d82fc | 1812 | return ReplaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE)); |
223e47cc LB |
1813 | |
1814 | // If we have ((X & C) == C), turn it into ((X & C) != 0). | |
1815 | if (RHS == BOC && RHSV.isPowerOf2()) | |
1816 | return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : | |
1817 | ICmpInst::ICMP_NE, LHSI, | |
1818 | Constant::getNullValue(RHS->getType())); | |
1819 | ||
1820 | // Don't perform the following transforms if the AND has multiple uses | |
1821 | if (!BO->hasOneUse()) | |
1822 | break; | |
1823 | ||
1824 | // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 | |
1825 | if (BOC->getValue().isSignBit()) { | |
1826 | Value *X = BO->getOperand(0); | |
1827 | Constant *Zero = Constant::getNullValue(X->getType()); | |
1828 | ICmpInst::Predicate pred = isICMP_NE ? | |
1829 | ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; | |
1830 | return new ICmpInst(pred, X, Zero); | |
1831 | } | |
1832 | ||
1833 | // ((X & ~7) == 0) --> X < 8 | |
1834 | if (RHSV == 0 && isHighOnes(BOC)) { | |
1835 | Value *X = BO->getOperand(0); | |
1836 | Constant *NegX = ConstantExpr::getNeg(BOC); | |
1837 | ICmpInst::Predicate pred = isICMP_NE ? | |
1838 | ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; | |
1839 | return new ICmpInst(pred, X, NegX); | |
1840 | } | |
1841 | } | |
1a4d82fc JJ |
1842 | break; |
1843 | case Instruction::Mul: | |
1844 | if (RHSV == 0 && BO->hasNoSignedWrap()) { | |
1845 | if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
1846 | // The trivial case (mul X, 0) is handled by InstSimplify | |
1847 | // General case : (mul X, C) != 0 iff X != 0 | |
1848 | // (mul X, C) == 0 iff X == 0 | |
1849 | if (!BOC->isZero()) | |
1850 | return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), | |
1851 | Constant::getNullValue(RHS->getType())); | |
1852 | } | |
1853 | } | |
1854 | break; | |
223e47cc LB |
1855 | default: break; |
1856 | } | |
1857 | } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { | |
1858 | // Handle icmp {eq|ne} <intrinsic>, intcst. | |
1859 | switch (II->getIntrinsicID()) { | |
1860 | case Intrinsic::bswap: | |
1861 | Worklist.Add(II); | |
1862 | ICI.setOperand(0, II->getArgOperand(0)); | |
1a4d82fc | 1863 | ICI.setOperand(1, Builder->getInt(RHSV.byteSwap())); |
223e47cc LB |
1864 | return &ICI; |
1865 | case Intrinsic::ctlz: | |
1866 | case Intrinsic::cttz: | |
1867 | // ctz(A) == bitwidth(a) -> A == 0 and likewise for != | |
1868 | if (RHSV == RHS->getType()->getBitWidth()) { | |
1869 | Worklist.Add(II); | |
1870 | ICI.setOperand(0, II->getArgOperand(0)); | |
1871 | ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); | |
1872 | return &ICI; | |
1873 | } | |
1874 | break; | |
1875 | case Intrinsic::ctpop: | |
1876 | // popcount(A) == 0 -> A == 0 and likewise for != | |
1877 | if (RHS->isZero()) { | |
1878 | Worklist.Add(II); | |
1879 | ICI.setOperand(0, II->getArgOperand(0)); | |
1880 | ICI.setOperand(1, RHS); | |
1881 | return &ICI; | |
1882 | } | |
1883 | break; | |
1884 | default: | |
1885 | break; | |
1886 | } | |
1887 | } | |
1888 | } | |
1a4d82fc | 1889 | return nullptr; |
223e47cc LB |
1890 | } |
1891 | ||
1892 | /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). | |
1893 | /// We only handle extending casts so far. | |
1894 | /// | |
1895 | Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { | |
1896 | const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); | |
1897 | Value *LHSCIOp = LHSCI->getOperand(0); | |
1898 | Type *SrcTy = LHSCIOp->getType(); | |
1899 | Type *DestTy = LHSCI->getType(); | |
1900 | Value *RHSCIOp; | |
1901 | ||
1902 | // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the | |
1903 | // integer type is the same size as the pointer type. | |
1a4d82fc JJ |
1904 | if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && |
1905 | DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { | |
1906 | Value *RHSOp = nullptr; | |
223e47cc LB |
1907 | if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { |
1908 | RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); | |
1909 | } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { | |
1910 | RHSOp = RHSC->getOperand(0); | |
1911 | // If the pointer types don't match, insert a bitcast. | |
1912 | if (LHSCIOp->getType() != RHSOp->getType()) | |
1913 | RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); | |
1914 | } | |
1915 | ||
1916 | if (RHSOp) | |
1917 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); | |
1918 | } | |
1919 | ||
1920 | // The code below only handles extension cast instructions, so far. | |
1921 | // Enforce this. | |
1922 | if (LHSCI->getOpcode() != Instruction::ZExt && | |
1923 | LHSCI->getOpcode() != Instruction::SExt) | |
1a4d82fc | 1924 | return nullptr; |
223e47cc LB |
1925 | |
1926 | bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; | |
1927 | bool isSignedCmp = ICI.isSigned(); | |
1928 | ||
1929 | if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { | |
1930 | // Not an extension from the same type? | |
1931 | RHSCIOp = CI->getOperand(0); | |
1932 | if (RHSCIOp->getType() != LHSCIOp->getType()) | |
1a4d82fc | 1933 | return nullptr; |
223e47cc LB |
1934 | |
1935 | // If the signedness of the two casts doesn't agree (i.e. one is a sext | |
1936 | // and the other is a zext), then we can't handle this. | |
1937 | if (CI->getOpcode() != LHSCI->getOpcode()) | |
1a4d82fc | 1938 | return nullptr; |
223e47cc LB |
1939 | |
1940 | // Deal with equality cases early. | |
1941 | if (ICI.isEquality()) | |
1942 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); | |
1943 | ||
1944 | // A signed comparison of sign extended values simplifies into a | |
1945 | // signed comparison. | |
1946 | if (isSignedCmp && isSignedExt) | |
1947 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); | |
1948 | ||
1949 | // The other three cases all fold into an unsigned comparison. | |
1950 | return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); | |
1951 | } | |
1952 | ||
1953 | // If we aren't dealing with a constant on the RHS, exit early | |
1954 | ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); | |
1955 | if (!CI) | |
1a4d82fc | 1956 | return nullptr; |
223e47cc LB |
1957 | |
1958 | // Compute the constant that would happen if we truncated to SrcTy then | |
1959 | // reextended to DestTy. | |
1960 | Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); | |
1961 | Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), | |
1962 | Res1, DestTy); | |
1963 | ||
1964 | // If the re-extended constant didn't change... | |
1965 | if (Res2 == CI) { | |
1966 | // Deal with equality cases early. | |
1967 | if (ICI.isEquality()) | |
1968 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); | |
1969 | ||
1970 | // A signed comparison of sign extended values simplifies into a | |
1971 | // signed comparison. | |
1972 | if (isSignedExt && isSignedCmp) | |
1973 | return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); | |
1974 | ||
1975 | // The other three cases all fold into an unsigned comparison. | |
1976 | return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); | |
1977 | } | |
1978 | ||
1979 | // The re-extended constant changed so the constant cannot be represented | |
1980 | // in the shorter type. Consequently, we cannot emit a simple comparison. | |
1981 | // All the cases that fold to true or false will have already been handled | |
1982 | // by SimplifyICmpInst, so only deal with the tricky case. | |
1983 | ||
1984 | if (isSignedCmp || !isSignedExt) | |
1a4d82fc | 1985 | return nullptr; |
223e47cc LB |
1986 | |
1987 | // Evaluate the comparison for LT (we invert for GT below). LE and GE cases | |
1988 | // should have been folded away previously and not enter in here. | |
1989 | ||
1990 | // We're performing an unsigned comp with a sign extended value. | |
1991 | // This is true if the input is >= 0. [aka >s -1] | |
1992 | Constant *NegOne = Constant::getAllOnesValue(SrcTy); | |
1993 | Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); | |
1994 | ||
1995 | // Finally, return the value computed. | |
1996 | if (ICI.getPredicate() == ICmpInst::ICMP_ULT) | |
1997 | return ReplaceInstUsesWith(ICI, Result); | |
1998 | ||
1999 | assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); | |
2000 | return BinaryOperator::CreateNot(Result); | |
2001 | } | |
2002 | ||
2003 | /// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form: | |
2004 | /// I = icmp ugt (add (add A, B), CI2), CI1 | |
2005 | /// If this is of the form: | |
2006 | /// sum = a + b | |
2007 | /// if (sum+128 >u 255) | |
2008 | /// Then replace it with llvm.sadd.with.overflow.i8. | |
2009 | /// | |
2010 | static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, | |
2011 | ConstantInt *CI2, ConstantInt *CI1, | |
2012 | InstCombiner &IC) { | |
2013 | // The transformation we're trying to do here is to transform this into an | |
2014 | // llvm.sadd.with.overflow. To do this, we have to replace the original add | |
2015 | // with a narrower add, and discard the add-with-constant that is part of the | |
2016 | // range check (if we can't eliminate it, this isn't profitable). | |
2017 | ||
2018 | // In order to eliminate the add-with-constant, the compare can be its only | |
2019 | // use. | |
2020 | Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); | |
1a4d82fc | 2021 | if (!AddWithCst->hasOneUse()) return nullptr; |
223e47cc LB |
2022 | |
2023 | // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. | |
1a4d82fc | 2024 | if (!CI2->getValue().isPowerOf2()) return nullptr; |
223e47cc | 2025 | unsigned NewWidth = CI2->getValue().countTrailingZeros(); |
1a4d82fc | 2026 | if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr; |
223e47cc LB |
2027 | |
2028 | // The width of the new add formed is 1 more than the bias. | |
2029 | ++NewWidth; | |
2030 | ||
2031 | // Check to see that CI1 is an all-ones value with NewWidth bits. | |
2032 | if (CI1->getBitWidth() == NewWidth || | |
2033 | CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) | |
1a4d82fc | 2034 | return nullptr; |
223e47cc LB |
2035 | |
2036 | // This is only really a signed overflow check if the inputs have been | |
2037 | // sign-extended; check for that condition. For example, if CI2 is 2^31 and | |
2038 | // the operands of the add are 64 bits wide, we need at least 33 sign bits. | |
2039 | unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; | |
1a4d82fc JJ |
2040 | if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits || |
2041 | IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits) | |
2042 | return nullptr; | |
223e47cc LB |
2043 | |
2044 | // In order to replace the original add with a narrower | |
2045 | // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant | |
2046 | // and truncates that discard the high bits of the add. Verify that this is | |
2047 | // the case. | |
2048 | Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0)); | |
1a4d82fc JJ |
2049 | for (User *U : OrigAdd->users()) { |
2050 | if (U == AddWithCst) continue; | |
223e47cc LB |
2051 | |
2052 | // Only accept truncates for now. We would really like a nice recursive | |
2053 | // predicate like SimplifyDemandedBits, but which goes downwards the use-def | |
2054 | // chain to see which bits of a value are actually demanded. If the | |
2055 | // original add had another add which was then immediately truncated, we | |
2056 | // could still do the transformation. | |
1a4d82fc JJ |
2057 | TruncInst *TI = dyn_cast<TruncInst>(U); |
2058 | if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) | |
2059 | return nullptr; | |
223e47cc LB |
2060 | } |
2061 | ||
2062 | // If the pattern matches, truncate the inputs to the narrower type and | |
2063 | // use the sadd_with_overflow intrinsic to efficiently compute both the | |
2064 | // result and the overflow bit. | |
2065 | Module *M = I.getParent()->getParent()->getParent(); | |
2066 | ||
2067 | Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); | |
2068 | Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, | |
2069 | NewType); | |
2070 | ||
2071 | InstCombiner::BuilderTy *Builder = IC.Builder; | |
2072 | ||
2073 | // Put the new code above the original add, in case there are any uses of the | |
2074 | // add between the add and the compare. | |
2075 | Builder->SetInsertPoint(OrigAdd); | |
2076 | ||
2077 | Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc"); | |
2078 | Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc"); | |
2079 | CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd"); | |
2080 | Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); | |
2081 | Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); | |
2082 | ||
2083 | // The inner add was the result of the narrow add, zero extended to the | |
2084 | // wider type. Replace it with the result computed by the intrinsic. | |
2085 | IC.ReplaceInstUsesWith(*OrigAdd, ZExt); | |
2086 | ||
2087 | // The original icmp gets replaced with the overflow value. | |
2088 | return ExtractValueInst::Create(Call, 1, "sadd.overflow"); | |
2089 | } | |
2090 | ||
2091 | static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, | |
2092 | InstCombiner &IC) { | |
2093 | // Don't bother doing this transformation for pointers, don't do it for | |
2094 | // vectors. | |
1a4d82fc | 2095 | if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; |
223e47cc LB |
2096 | |
2097 | // If the add is a constant expr, then we don't bother transforming it. | |
2098 | Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); | |
1a4d82fc | 2099 | if (!OrigAdd) return nullptr; |
223e47cc LB |
2100 | |
2101 | Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); | |
2102 | ||
2103 | // Put the new code above the original add, in case there are any uses of the | |
2104 | // add between the add and the compare. | |
2105 | InstCombiner::BuilderTy *Builder = IC.Builder; | |
2106 | Builder->SetInsertPoint(OrigAdd); | |
2107 | ||
2108 | Module *M = I.getParent()->getParent()->getParent(); | |
2109 | Type *Ty = LHS->getType(); | |
2110 | Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); | |
2111 | CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); | |
2112 | Value *Add = Builder->CreateExtractValue(Call, 0); | |
2113 | ||
2114 | IC.ReplaceInstUsesWith(*OrigAdd, Add); | |
2115 | ||
2116 | // The original icmp gets replaced with the overflow value. | |
2117 | return ExtractValueInst::Create(Call, 1, "uadd.overflow"); | |
2118 | } | |
2119 | ||
1a4d82fc JJ |
2120 | /// \brief Recognize and process idiom involving test for multiplication |
2121 | /// overflow. | |
2122 | /// | |
2123 | /// The caller has matched a pattern of the form: | |
2124 | /// I = cmp u (mul(zext A, zext B), V | |
2125 | /// The function checks if this is a test for overflow and if so replaces | |
2126 | /// multiplication with call to 'mul.with.overflow' intrinsic. | |
2127 | /// | |
2128 | /// \param I Compare instruction. | |
2129 | /// \param MulVal Result of 'mult' instruction. It is one of the arguments of | |
2130 | /// the compare instruction. Must be of integer type. | |
2131 | /// \param OtherVal The other argument of compare instruction. | |
2132 | /// \returns Instruction which must replace the compare instruction, NULL if no | |
2133 | /// replacement required. | |
2134 | static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, | |
2135 | Value *OtherVal, InstCombiner &IC) { | |
2136 | // Don't bother doing this transformation for pointers, don't do it for | |
2137 | // vectors. | |
2138 | if (!isa<IntegerType>(MulVal->getType())) | |
2139 | return nullptr; | |
2140 | ||
2141 | assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); | |
2142 | assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); | |
2143 | Instruction *MulInstr = cast<Instruction>(MulVal); | |
2144 | assert(MulInstr->getOpcode() == Instruction::Mul); | |
2145 | ||
2146 | Instruction *LHS = cast<Instruction>(MulInstr->getOperand(0)), | |
2147 | *RHS = cast<Instruction>(MulInstr->getOperand(1)); | |
2148 | assert(LHS->getOpcode() == Instruction::ZExt); | |
2149 | assert(RHS->getOpcode() == Instruction::ZExt); | |
2150 | Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); | |
2151 | ||
2152 | // Calculate type and width of the result produced by mul.with.overflow. | |
2153 | Type *TyA = A->getType(), *TyB = B->getType(); | |
2154 | unsigned WidthA = TyA->getPrimitiveSizeInBits(), | |
2155 | WidthB = TyB->getPrimitiveSizeInBits(); | |
2156 | unsigned MulWidth; | |
2157 | Type *MulType; | |
2158 | if (WidthB > WidthA) { | |
2159 | MulWidth = WidthB; | |
2160 | MulType = TyB; | |
2161 | } else { | |
2162 | MulWidth = WidthA; | |
2163 | MulType = TyA; | |
2164 | } | |
2165 | ||
2166 | // In order to replace the original mul with a narrower mul.with.overflow, | |
2167 | // all uses must ignore upper bits of the product. The number of used low | |
2168 | // bits must be not greater than the width of mul.with.overflow. | |
2169 | if (MulVal->hasNUsesOrMore(2)) | |
2170 | for (User *U : MulVal->users()) { | |
2171 | if (U == &I) | |
2172 | continue; | |
2173 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { | |
2174 | // Check if truncation ignores bits above MulWidth. | |
2175 | unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); | |
2176 | if (TruncWidth > MulWidth) | |
2177 | return nullptr; | |
2178 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { | |
2179 | // Check if AND ignores bits above MulWidth. | |
2180 | if (BO->getOpcode() != Instruction::And) | |
2181 | return nullptr; | |
2182 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { | |
2183 | const APInt &CVal = CI->getValue(); | |
2184 | if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) | |
2185 | return nullptr; | |
2186 | } | |
2187 | } else { | |
2188 | // Other uses prohibit this transformation. | |
2189 | return nullptr; | |
2190 | } | |
2191 | } | |
2192 | ||
2193 | // Recognize patterns | |
2194 | switch (I.getPredicate()) { | |
2195 | case ICmpInst::ICMP_EQ: | |
2196 | case ICmpInst::ICMP_NE: | |
2197 | // Recognize pattern: | |
2198 | // mulval = mul(zext A, zext B) | |
2199 | // cmp eq/neq mulval, zext trunc mulval | |
2200 | if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal)) | |
2201 | if (Zext->hasOneUse()) { | |
2202 | Value *ZextArg = Zext->getOperand(0); | |
2203 | if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg)) | |
2204 | if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) | |
2205 | break; //Recognized | |
2206 | } | |
2207 | ||
2208 | // Recognize pattern: | |
2209 | // mulval = mul(zext A, zext B) | |
2210 | // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. | |
2211 | ConstantInt *CI; | |
2212 | Value *ValToMask; | |
2213 | if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { | |
2214 | if (ValToMask != MulVal) | |
2215 | return nullptr; | |
2216 | const APInt &CVal = CI->getValue() + 1; | |
2217 | if (CVal.isPowerOf2()) { | |
2218 | unsigned MaskWidth = CVal.logBase2(); | |
2219 | if (MaskWidth == MulWidth) | |
2220 | break; // Recognized | |
2221 | } | |
2222 | } | |
2223 | return nullptr; | |
2224 | ||
2225 | case ICmpInst::ICMP_UGT: | |
2226 | // Recognize pattern: | |
2227 | // mulval = mul(zext A, zext B) | |
2228 | // cmp ugt mulval, max | |
2229 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2230 | APInt MaxVal = APInt::getMaxValue(MulWidth); | |
2231 | MaxVal = MaxVal.zext(CI->getBitWidth()); | |
2232 | if (MaxVal.eq(CI->getValue())) | |
2233 | break; // Recognized | |
2234 | } | |
2235 | return nullptr; | |
2236 | ||
2237 | case ICmpInst::ICMP_UGE: | |
2238 | // Recognize pattern: | |
2239 | // mulval = mul(zext A, zext B) | |
2240 | // cmp uge mulval, max+1 | |
2241 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2242 | APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); | |
2243 | if (MaxVal.eq(CI->getValue())) | |
2244 | break; // Recognized | |
2245 | } | |
2246 | return nullptr; | |
2247 | ||
2248 | case ICmpInst::ICMP_ULE: | |
2249 | // Recognize pattern: | |
2250 | // mulval = mul(zext A, zext B) | |
2251 | // cmp ule mulval, max | |
2252 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2253 | APInt MaxVal = APInt::getMaxValue(MulWidth); | |
2254 | MaxVal = MaxVal.zext(CI->getBitWidth()); | |
2255 | if (MaxVal.eq(CI->getValue())) | |
2256 | break; // Recognized | |
2257 | } | |
2258 | return nullptr; | |
2259 | ||
2260 | case ICmpInst::ICMP_ULT: | |
2261 | // Recognize pattern: | |
2262 | // mulval = mul(zext A, zext B) | |
2263 | // cmp ule mulval, max + 1 | |
2264 | if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { | |
2265 | APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); | |
2266 | if (MaxVal.eq(CI->getValue())) | |
2267 | break; // Recognized | |
2268 | } | |
2269 | return nullptr; | |
2270 | ||
2271 | default: | |
2272 | return nullptr; | |
2273 | } | |
2274 | ||
2275 | InstCombiner::BuilderTy *Builder = IC.Builder; | |
2276 | Builder->SetInsertPoint(MulInstr); | |
2277 | Module *M = I.getParent()->getParent()->getParent(); | |
2278 | ||
2279 | // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) | |
2280 | Value *MulA = A, *MulB = B; | |
2281 | if (WidthA < MulWidth) | |
2282 | MulA = Builder->CreateZExt(A, MulType); | |
2283 | if (WidthB < MulWidth) | |
2284 | MulB = Builder->CreateZExt(B, MulType); | |
2285 | Value *F = | |
2286 | Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); | |
2287 | CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul"); | |
2288 | IC.Worklist.Add(MulInstr); | |
2289 | ||
2290 | // If there are uses of mul result other than the comparison, we know that | |
2291 | // they are truncation or binary AND. Change them to use result of | |
2292 | // mul.with.overflow and adjust properly mask/size. | |
2293 | if (MulVal->hasNUsesOrMore(2)) { | |
2294 | Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); | |
2295 | for (User *U : MulVal->users()) { | |
2296 | if (U == &I || U == OtherVal) | |
2297 | continue; | |
2298 | if (TruncInst *TI = dyn_cast<TruncInst>(U)) { | |
2299 | if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) | |
2300 | IC.ReplaceInstUsesWith(*TI, Mul); | |
2301 | else | |
2302 | TI->setOperand(0, Mul); | |
2303 | } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { | |
2304 | assert(BO->getOpcode() == Instruction::And); | |
2305 | // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) | |
2306 | ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); | |
2307 | APInt ShortMask = CI->getValue().trunc(MulWidth); | |
2308 | Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); | |
2309 | Instruction *Zext = | |
2310 | cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType())); | |
2311 | IC.Worklist.Add(Zext); | |
2312 | IC.ReplaceInstUsesWith(*BO, Zext); | |
2313 | } else { | |
2314 | llvm_unreachable("Unexpected Binary operation"); | |
2315 | } | |
2316 | IC.Worklist.Add(cast<Instruction>(U)); | |
2317 | } | |
2318 | } | |
2319 | if (isa<Instruction>(OtherVal)) | |
2320 | IC.Worklist.Add(cast<Instruction>(OtherVal)); | |
2321 | ||
2322 | // The original icmp gets replaced with the overflow value, maybe inverted | |
2323 | // depending on predicate. | |
2324 | bool Inverse = false; | |
2325 | switch (I.getPredicate()) { | |
2326 | case ICmpInst::ICMP_NE: | |
2327 | break; | |
2328 | case ICmpInst::ICMP_EQ: | |
2329 | Inverse = true; | |
2330 | break; | |
2331 | case ICmpInst::ICMP_UGT: | |
2332 | case ICmpInst::ICMP_UGE: | |
2333 | if (I.getOperand(0) == MulVal) | |
2334 | break; | |
2335 | Inverse = true; | |
2336 | break; | |
2337 | case ICmpInst::ICMP_ULT: | |
2338 | case ICmpInst::ICMP_ULE: | |
2339 | if (I.getOperand(1) == MulVal) | |
2340 | break; | |
2341 | Inverse = true; | |
2342 | break; | |
2343 | default: | |
2344 | llvm_unreachable("Unexpected predicate"); | |
2345 | } | |
2346 | if (Inverse) { | |
2347 | Value *Res = Builder->CreateExtractValue(Call, 1); | |
2348 | return BinaryOperator::CreateNot(Res); | |
2349 | } | |
2350 | ||
2351 | return ExtractValueInst::Create(Call, 1); | |
2352 | } | |
2353 | ||
223e47cc LB |
2354 | // DemandedBitsLHSMask - When performing a comparison against a constant, |
2355 | // it is possible that not all the bits in the LHS are demanded. This helper | |
2356 | // method computes the mask that IS demanded. | |
2357 | static APInt DemandedBitsLHSMask(ICmpInst &I, | |
2358 | unsigned BitWidth, bool isSignCheck) { | |
2359 | if (isSignCheck) | |
2360 | return APInt::getSignBit(BitWidth); | |
2361 | ||
2362 | ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); | |
2363 | if (!CI) return APInt::getAllOnesValue(BitWidth); | |
2364 | const APInt &RHS = CI->getValue(); | |
2365 | ||
2366 | switch (I.getPredicate()) { | |
2367 | // For a UGT comparison, we don't care about any bits that | |
2368 | // correspond to the trailing ones of the comparand. The value of these | |
2369 | // bits doesn't impact the outcome of the comparison, because any value | |
2370 | // greater than the RHS must differ in a bit higher than these due to carry. | |
2371 | case ICmpInst::ICMP_UGT: { | |
2372 | unsigned trailingOnes = RHS.countTrailingOnes(); | |
2373 | APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); | |
2374 | return ~lowBitsSet; | |
2375 | } | |
2376 | ||
2377 | // Similarly, for a ULT comparison, we don't care about the trailing zeros. | |
2378 | // Any value less than the RHS must differ in a higher bit because of carries. | |
2379 | case ICmpInst::ICMP_ULT: { | |
2380 | unsigned trailingZeros = RHS.countTrailingZeros(); | |
2381 | APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); | |
2382 | return ~lowBitsSet; | |
2383 | } | |
2384 | ||
2385 | default: | |
2386 | return APInt::getAllOnesValue(BitWidth); | |
2387 | } | |
2388 | ||
2389 | } | |
2390 | ||
1a4d82fc JJ |
2391 | /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst |
2392 | /// should be swapped. | |
2393 | /// The decision is based on how many times these two operands are reused | |
2394 | /// as subtract operands and their positions in those instructions. | |
2395 | /// The rational is that several architectures use the same instruction for | |
2396 | /// both subtract and cmp, thus it is better if the order of those operands | |
2397 | /// match. | |
2398 | /// \return true if Op0 and Op1 should be swapped. | |
2399 | static bool swapMayExposeCSEOpportunities(const Value * Op0, | |
2400 | const Value * Op1) { | |
2401 | // Filter out pointer value as those cannot appears directly in subtract. | |
2402 | // FIXME: we may want to go through inttoptrs or bitcasts. | |
2403 | if (Op0->getType()->isPointerTy()) | |
2404 | return false; | |
2405 | // Count every uses of both Op0 and Op1 in a subtract. | |
2406 | // Each time Op0 is the first operand, count -1: swapping is bad, the | |
2407 | // subtract has already the same layout as the compare. | |
2408 | // Each time Op0 is the second operand, count +1: swapping is good, the | |
2409 | // subtract has a different layout as the compare. | |
2410 | // At the end, if the benefit is greater than 0, Op0 should come second to | |
2411 | // expose more CSE opportunities. | |
2412 | int GlobalSwapBenefits = 0; | |
2413 | for (const User *U : Op0->users()) { | |
2414 | const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U); | |
2415 | if (!BinOp || BinOp->getOpcode() != Instruction::Sub) | |
2416 | continue; | |
2417 | // If Op0 is the first argument, this is not beneficial to swap the | |
2418 | // arguments. | |
2419 | int LocalSwapBenefits = -1; | |
2420 | unsigned Op1Idx = 1; | |
2421 | if (BinOp->getOperand(Op1Idx) == Op0) { | |
2422 | Op1Idx = 0; | |
2423 | LocalSwapBenefits = 1; | |
2424 | } | |
2425 | if (BinOp->getOperand(Op1Idx) != Op1) | |
2426 | continue; | |
2427 | GlobalSwapBenefits += LocalSwapBenefits; | |
2428 | } | |
2429 | return GlobalSwapBenefits > 0; | |
2430 | } | |
2431 | ||
223e47cc LB |
2432 | Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { |
2433 | bool Changed = false; | |
2434 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | |
1a4d82fc JJ |
2435 | unsigned Op0Cplxity = getComplexity(Op0); |
2436 | unsigned Op1Cplxity = getComplexity(Op1); | |
223e47cc LB |
2437 | |
2438 | /// Orders the operands of the compare so that they are listed from most | |
2439 | /// complex to least complex. This puts constants before unary operators, | |
2440 | /// before binary operators. | |
1a4d82fc JJ |
2441 | if (Op0Cplxity < Op1Cplxity || |
2442 | (Op0Cplxity == Op1Cplxity && | |
2443 | swapMayExposeCSEOpportunities(Op0, Op1))) { | |
223e47cc LB |
2444 | I.swapOperands(); |
2445 | std::swap(Op0, Op1); | |
2446 | Changed = true; | |
2447 | } | |
2448 | ||
1a4d82fc | 2449 | if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AT)) |
223e47cc LB |
2450 | return ReplaceInstUsesWith(I, V); |
2451 | ||
2452 | // comparing -val or val with non-zero is the same as just comparing val | |
2453 | // ie, abs(val) != 0 -> val != 0 | |
2454 | if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) | |
2455 | { | |
2456 | Value *Cond, *SelectTrue, *SelectFalse; | |
2457 | if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), | |
2458 | m_Value(SelectFalse)))) { | |
2459 | if (Value *V = dyn_castNegVal(SelectTrue)) { | |
2460 | if (V == SelectFalse) | |
2461 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); | |
2462 | } | |
2463 | else if (Value *V = dyn_castNegVal(SelectFalse)) { | |
2464 | if (V == SelectTrue) | |
2465 | return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); | |
2466 | } | |
2467 | } | |
2468 | } | |
2469 | ||
2470 | Type *Ty = Op0->getType(); | |
2471 | ||
2472 | // icmp's with boolean values can always be turned into bitwise operations | |
2473 | if (Ty->isIntegerTy(1)) { | |
2474 | switch (I.getPredicate()) { | |
2475 | default: llvm_unreachable("Invalid icmp instruction!"); | |
2476 | case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) | |
2477 | Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); | |
2478 | return BinaryOperator::CreateNot(Xor); | |
2479 | } | |
2480 | case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B | |
2481 | return BinaryOperator::CreateXor(Op0, Op1); | |
2482 | ||
2483 | case ICmpInst::ICMP_UGT: | |
2484 | std::swap(Op0, Op1); // Change icmp ugt -> icmp ult | |
2485 | // FALL THROUGH | |
2486 | case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B | |
2487 | Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); | |
2488 | return BinaryOperator::CreateAnd(Not, Op1); | |
2489 | } | |
2490 | case ICmpInst::ICMP_SGT: | |
2491 | std::swap(Op0, Op1); // Change icmp sgt -> icmp slt | |
2492 | // FALL THROUGH | |
2493 | case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B | |
2494 | Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); | |
2495 | return BinaryOperator::CreateAnd(Not, Op0); | |
2496 | } | |
2497 | case ICmpInst::ICMP_UGE: | |
2498 | std::swap(Op0, Op1); // Change icmp uge -> icmp ule | |
2499 | // FALL THROUGH | |
2500 | case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B | |
2501 | Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); | |
2502 | return BinaryOperator::CreateOr(Not, Op1); | |
2503 | } | |
2504 | case ICmpInst::ICMP_SGE: | |
2505 | std::swap(Op0, Op1); // Change icmp sge -> icmp sle | |
2506 | // FALL THROUGH | |
2507 | case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B | |
2508 | Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); | |
2509 | return BinaryOperator::CreateOr(Not, Op0); | |
2510 | } | |
2511 | } | |
2512 | } | |
2513 | ||
2514 | unsigned BitWidth = 0; | |
2515 | if (Ty->isIntOrIntVectorTy()) | |
2516 | BitWidth = Ty->getScalarSizeInBits(); | |
1a4d82fc JJ |
2517 | else if (DL) // Pointers require DL info to get their size. |
2518 | BitWidth = DL->getTypeSizeInBits(Ty->getScalarType()); | |
223e47cc LB |
2519 | |
2520 | bool isSignBit = false; | |
2521 | ||
2522 | // See if we are doing a comparison with a constant. | |
2523 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
1a4d82fc | 2524 | Value *A = nullptr, *B = nullptr; |
223e47cc LB |
2525 | |
2526 | // Match the following pattern, which is a common idiom when writing | |
2527 | // overflow-safe integer arithmetic function. The source performs an | |
2528 | // addition in wider type, and explicitly checks for overflow using | |
2529 | // comparisons against INT_MIN and INT_MAX. Simplify this by using the | |
2530 | // sadd_with_overflow intrinsic. | |
2531 | // | |
2532 | // TODO: This could probably be generalized to handle other overflow-safe | |
2533 | // operations if we worked out the formulas to compute the appropriate | |
2534 | // magic constants. | |
2535 | // | |
2536 | // sum = a + b | |
2537 | // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8 | |
2538 | { | |
2539 | ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI | |
2540 | if (I.getPredicate() == ICmpInst::ICMP_UGT && | |
2541 | match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2)))) | |
2542 | if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this)) | |
2543 | return Res; | |
2544 | } | |
2545 | ||
2546 | // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) | |
2547 | if (I.isEquality() && CI->isZero() && | |
2548 | match(Op0, m_Sub(m_Value(A), m_Value(B)))) { | |
2549 | // (icmp cond A B) if cond is equality | |
2550 | return new ICmpInst(I.getPredicate(), A, B); | |
2551 | } | |
2552 | ||
2553 | // If we have an icmp le or icmp ge instruction, turn it into the | |
2554 | // appropriate icmp lt or icmp gt instruction. This allows us to rely on | |
2555 | // them being folded in the code below. The SimplifyICmpInst code has | |
2556 | // already handled the edge cases for us, so we just assert on them. | |
2557 | switch (I.getPredicate()) { | |
2558 | default: break; | |
2559 | case ICmpInst::ICMP_ULE: | |
2560 | assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE | |
2561 | return new ICmpInst(ICmpInst::ICMP_ULT, Op0, | |
1a4d82fc | 2562 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2563 | case ICmpInst::ICMP_SLE: |
2564 | assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE | |
2565 | return new ICmpInst(ICmpInst::ICMP_SLT, Op0, | |
1a4d82fc | 2566 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2567 | case ICmpInst::ICMP_UGE: |
2568 | assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE | |
2569 | return new ICmpInst(ICmpInst::ICMP_UGT, Op0, | |
1a4d82fc | 2570 | Builder->getInt(CI->getValue()-1)); |
223e47cc LB |
2571 | case ICmpInst::ICMP_SGE: |
2572 | assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE | |
2573 | return new ICmpInst(ICmpInst::ICMP_SGT, Op0, | |
1a4d82fc JJ |
2574 | Builder->getInt(CI->getValue()-1)); |
2575 | } | |
2576 | ||
2577 | // (icmp eq/ne (ashr/lshr const2, A), const1) | |
2578 | if (I.isEquality()) { | |
2579 | ConstantInt *CI2; | |
2580 | if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) || | |
2581 | match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) { | |
2582 | return FoldICmpCstShrCst(I, Op0, A, CI, CI2); | |
2583 | } | |
223e47cc LB |
2584 | } |
2585 | ||
2586 | // If this comparison is a normal comparison, it demands all | |
2587 | // bits, if it is a sign bit comparison, it only demands the sign bit. | |
2588 | bool UnusedBit; | |
2589 | isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); | |
2590 | } | |
2591 | ||
2592 | // See if we can fold the comparison based on range information we can get | |
2593 | // by checking whether bits are known to be zero or one in the input. | |
2594 | if (BitWidth != 0) { | |
2595 | APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); | |
2596 | APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); | |
2597 | ||
2598 | if (SimplifyDemandedBits(I.getOperandUse(0), | |
2599 | DemandedBitsLHSMask(I, BitWidth, isSignBit), | |
2600 | Op0KnownZero, Op0KnownOne, 0)) | |
2601 | return &I; | |
2602 | if (SimplifyDemandedBits(I.getOperandUse(1), | |
2603 | APInt::getAllOnesValue(BitWidth), | |
2604 | Op1KnownZero, Op1KnownOne, 0)) | |
2605 | return &I; | |
2606 | ||
2607 | // Given the known and unknown bits, compute a range that the LHS could be | |
2608 | // in. Compute the Min, Max and RHS values based on the known bits. For the | |
2609 | // EQ and NE we use unsigned values. | |
2610 | APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); | |
2611 | APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); | |
2612 | if (I.isSigned()) { | |
2613 | ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, | |
2614 | Op0Min, Op0Max); | |
2615 | ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, | |
2616 | Op1Min, Op1Max); | |
2617 | } else { | |
2618 | ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, | |
2619 | Op0Min, Op0Max); | |
2620 | ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, | |
2621 | Op1Min, Op1Max); | |
2622 | } | |
2623 | ||
2624 | // If Min and Max are known to be the same, then SimplifyDemandedBits | |
2625 | // figured out that the LHS is a constant. Just constant fold this now so | |
2626 | // that code below can assume that Min != Max. | |
2627 | if (!isa<Constant>(Op0) && Op0Min == Op0Max) | |
2628 | return new ICmpInst(I.getPredicate(), | |
2629 | ConstantInt::get(Op0->getType(), Op0Min), Op1); | |
2630 | if (!isa<Constant>(Op1) && Op1Min == Op1Max) | |
2631 | return new ICmpInst(I.getPredicate(), Op0, | |
2632 | ConstantInt::get(Op1->getType(), Op1Min)); | |
2633 | ||
2634 | // Based on the range information we know about the LHS, see if we can | |
2635 | // simplify this comparison. For example, (x&4) < 8 is always true. | |
2636 | switch (I.getPredicate()) { | |
2637 | default: llvm_unreachable("Unknown icmp opcode!"); | |
2638 | case ICmpInst::ICMP_EQ: { | |
2639 | if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) | |
2640 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2641 | ||
2642 | // If all bits are known zero except for one, then we know at most one | |
2643 | // bit is set. If the comparison is against zero, then this is a check | |
2644 | // to see if *that* bit is set. | |
2645 | APInt Op0KnownZeroInverted = ~Op0KnownZero; | |
1a4d82fc | 2646 | if (~Op1KnownZero == 0) { |
223e47cc | 2647 | // If the LHS is an AND with the same constant, look through it. |
1a4d82fc JJ |
2648 | Value *LHS = nullptr; |
2649 | ConstantInt *LHSC = nullptr; | |
223e47cc LB |
2650 | if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || |
2651 | LHSC->getValue() != Op0KnownZeroInverted) | |
2652 | LHS = Op0; | |
2653 | ||
2654 | // If the LHS is 1 << x, and we know the result is a power of 2 like 8, | |
2655 | // then turn "((1 << x)&8) == 0" into "x != 3". | |
1a4d82fc JJ |
2656 | // or turn "((1 << x)&7) == 0" into "x > 2". |
2657 | Value *X = nullptr; | |
223e47cc | 2658 | if (match(LHS, m_Shl(m_One(), m_Value(X)))) { |
1a4d82fc JJ |
2659 | APInt ValToCheck = Op0KnownZeroInverted; |
2660 | if (ValToCheck.isPowerOf2()) { | |
2661 | unsigned CmpVal = ValToCheck.countTrailingZeros(); | |
2662 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
2663 | ConstantInt::get(X->getType(), CmpVal)); | |
2664 | } else if ((++ValToCheck).isPowerOf2()) { | |
2665 | unsigned CmpVal = ValToCheck.countTrailingZeros() - 1; | |
2666 | return new ICmpInst(ICmpInst::ICMP_UGT, X, | |
2667 | ConstantInt::get(X->getType(), CmpVal)); | |
2668 | } | |
223e47cc LB |
2669 | } |
2670 | ||
2671 | // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, | |
2672 | // then turn "((8 >>u x)&1) == 0" into "x != 3". | |
2673 | const APInt *CI; | |
2674 | if (Op0KnownZeroInverted == 1 && | |
2675 | match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) | |
2676 | return new ICmpInst(ICmpInst::ICMP_NE, X, | |
2677 | ConstantInt::get(X->getType(), | |
2678 | CI->countTrailingZeros())); | |
2679 | } | |
2680 | ||
2681 | break; | |
2682 | } | |
2683 | case ICmpInst::ICMP_NE: { | |
2684 | if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) | |
2685 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2686 | ||
2687 | // If all bits are known zero except for one, then we know at most one | |
2688 | // bit is set. If the comparison is against zero, then this is a check | |
2689 | // to see if *that* bit is set. | |
2690 | APInt Op0KnownZeroInverted = ~Op0KnownZero; | |
1a4d82fc | 2691 | if (~Op1KnownZero == 0) { |
223e47cc | 2692 | // If the LHS is an AND with the same constant, look through it. |
1a4d82fc JJ |
2693 | Value *LHS = nullptr; |
2694 | ConstantInt *LHSC = nullptr; | |
223e47cc LB |
2695 | if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || |
2696 | LHSC->getValue() != Op0KnownZeroInverted) | |
2697 | LHS = Op0; | |
2698 | ||
2699 | // If the LHS is 1 << x, and we know the result is a power of 2 like 8, | |
2700 | // then turn "((1 << x)&8) != 0" into "x == 3". | |
1a4d82fc JJ |
2701 | // or turn "((1 << x)&7) != 0" into "x < 3". |
2702 | Value *X = nullptr; | |
223e47cc | 2703 | if (match(LHS, m_Shl(m_One(), m_Value(X)))) { |
1a4d82fc JJ |
2704 | APInt ValToCheck = Op0KnownZeroInverted; |
2705 | if (ValToCheck.isPowerOf2()) { | |
2706 | unsigned CmpVal = ValToCheck.countTrailingZeros(); | |
2707 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
2708 | ConstantInt::get(X->getType(), CmpVal)); | |
2709 | } else if ((++ValToCheck).isPowerOf2()) { | |
2710 | unsigned CmpVal = ValToCheck.countTrailingZeros(); | |
2711 | return new ICmpInst(ICmpInst::ICMP_ULT, X, | |
2712 | ConstantInt::get(X->getType(), CmpVal)); | |
2713 | } | |
223e47cc LB |
2714 | } |
2715 | ||
2716 | // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, | |
2717 | // then turn "((8 >>u x)&1) != 0" into "x == 3". | |
2718 | const APInt *CI; | |
2719 | if (Op0KnownZeroInverted == 1 && | |
2720 | match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) | |
2721 | return new ICmpInst(ICmpInst::ICMP_EQ, X, | |
2722 | ConstantInt::get(X->getType(), | |
2723 | CI->countTrailingZeros())); | |
2724 | } | |
2725 | ||
2726 | break; | |
2727 | } | |
2728 | case ICmpInst::ICMP_ULT: | |
2729 | if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) | |
2730 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2731 | if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) | |
2732 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2733 | if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) | |
2734 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2735 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2736 | if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C | |
2737 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2738 | Builder->getInt(CI->getValue()-1)); |
223e47cc LB |
2739 | |
2740 | // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear | |
2741 | if (CI->isMinValue(true)) | |
2742 | return new ICmpInst(ICmpInst::ICMP_SGT, Op0, | |
2743 | Constant::getAllOnesValue(Op0->getType())); | |
2744 | } | |
2745 | break; | |
2746 | case ICmpInst::ICMP_UGT: | |
2747 | if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) | |
2748 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2749 | if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) | |
2750 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2751 | ||
2752 | if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) | |
2753 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2754 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2755 | if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C | |
2756 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2757 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2758 | |
2759 | // (x >u 2147483647) -> (x <s 0) -> true if sign bit set | |
2760 | if (CI->isMaxValue(true)) | |
2761 | return new ICmpInst(ICmpInst::ICMP_SLT, Op0, | |
2762 | Constant::getNullValue(Op0->getType())); | |
2763 | } | |
2764 | break; | |
2765 | case ICmpInst::ICMP_SLT: | |
2766 | if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) | |
2767 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2768 | if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) | |
2769 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2770 | if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) | |
2771 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2772 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2773 | if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C | |
2774 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2775 | Builder->getInt(CI->getValue()-1)); |
223e47cc LB |
2776 | } |
2777 | break; | |
2778 | case ICmpInst::ICMP_SGT: | |
2779 | if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) | |
2780 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2781 | if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) | |
2782 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2783 | ||
2784 | if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) | |
2785 | return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); | |
2786 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2787 | if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C | |
2788 | return new ICmpInst(ICmpInst::ICMP_EQ, Op0, | |
1a4d82fc | 2789 | Builder->getInt(CI->getValue()+1)); |
223e47cc LB |
2790 | } |
2791 | break; | |
2792 | case ICmpInst::ICMP_SGE: | |
2793 | assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); | |
2794 | if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) | |
2795 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2796 | if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) | |
2797 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2798 | break; | |
2799 | case ICmpInst::ICMP_SLE: | |
2800 | assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); | |
2801 | if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) | |
2802 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2803 | if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) | |
2804 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2805 | break; | |
2806 | case ICmpInst::ICMP_UGE: | |
2807 | assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); | |
2808 | if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) | |
2809 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2810 | if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) | |
2811 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2812 | break; | |
2813 | case ICmpInst::ICMP_ULE: | |
2814 | assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); | |
2815 | if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) | |
2816 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
2817 | if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) | |
2818 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
2819 | break; | |
2820 | } | |
2821 | ||
2822 | // Turn a signed comparison into an unsigned one if both operands | |
2823 | // are known to have the same sign. | |
2824 | if (I.isSigned() && | |
2825 | ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || | |
2826 | (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) | |
2827 | return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); | |
2828 | } | |
2829 | ||
2830 | // Test if the ICmpInst instruction is used exclusively by a select as | |
2831 | // part of a minimum or maximum operation. If so, refrain from doing | |
2832 | // any other folding. This helps out other analyses which understand | |
2833 | // non-obfuscated minimum and maximum idioms, such as ScalarEvolution | |
2834 | // and CodeGen. And in this case, at least one of the comparison | |
2835 | // operands has at least one user besides the compare (the select), | |
2836 | // which would often largely negate the benefit of folding anyway. | |
2837 | if (I.hasOneUse()) | |
1a4d82fc | 2838 | if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin())) |
223e47cc LB |
2839 | if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || |
2840 | (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) | |
1a4d82fc | 2841 | return nullptr; |
223e47cc LB |
2842 | |
2843 | // See if we are doing a comparison between a constant and an instruction that | |
2844 | // can be folded into the comparison. | |
2845 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { | |
2846 | // Since the RHS is a ConstantInt (CI), if the left hand side is an | |
2847 | // instruction, see if that instruction also has constants so that the | |
2848 | // instruction can be folded into the icmp | |
2849 | if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) | |
2850 | if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) | |
2851 | return Res; | |
2852 | } | |
2853 | ||
2854 | // Handle icmp with constant (but not simple integer constant) RHS | |
2855 | if (Constant *RHSC = dyn_cast<Constant>(Op1)) { | |
2856 | if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) | |
2857 | switch (LHSI->getOpcode()) { | |
2858 | case Instruction::GetElementPtr: | |
2859 | // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null | |
2860 | if (RHSC->isNullValue() && | |
2861 | cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) | |
2862 | return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), | |
2863 | Constant::getNullValue(LHSI->getOperand(0)->getType())); | |
2864 | break; | |
2865 | case Instruction::PHI: | |
2866 | // Only fold icmp into the PHI if the phi and icmp are in the same | |
2867 | // block. If in the same block, we're encouraging jump threading. If | |
2868 | // not, we are just pessimizing the code by making an i1 phi. | |
2869 | if (LHSI->getParent() == I.getParent()) | |
2870 | if (Instruction *NV = FoldOpIntoPhi(I)) | |
2871 | return NV; | |
2872 | break; | |
2873 | case Instruction::Select: { | |
2874 | // If either operand of the select is a constant, we can fold the | |
2875 | // comparison into the select arms, which will cause one to be | |
2876 | // constant folded and the select turned into a bitwise or. | |
1a4d82fc | 2877 | Value *Op1 = nullptr, *Op2 = nullptr; |
223e47cc LB |
2878 | if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) |
2879 | Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); | |
2880 | if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) | |
2881 | Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); | |
2882 | ||
2883 | // We only want to perform this transformation if it will not lead to | |
2884 | // additional code. This is true if either both sides of the select | |
2885 | // fold to a constant (in which case the icmp is replaced with a select | |
2886 | // which will usually simplify) or this is the only user of the | |
2887 | // select (in which case we are trading a select+icmp for a simpler | |
2888 | // select+icmp). | |
2889 | if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { | |
2890 | if (!Op1) | |
2891 | Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), | |
2892 | RHSC, I.getName()); | |
2893 | if (!Op2) | |
2894 | Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), | |
2895 | RHSC, I.getName()); | |
2896 | return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); | |
2897 | } | |
2898 | break; | |
2899 | } | |
2900 | case Instruction::IntToPtr: | |
2901 | // icmp pred inttoptr(X), null -> icmp pred X, 0 | |
1a4d82fc JJ |
2902 | if (RHSC->isNullValue() && DL && |
2903 | DL->getIntPtrType(RHSC->getType()) == | |
223e47cc LB |
2904 | LHSI->getOperand(0)->getType()) |
2905 | return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), | |
2906 | Constant::getNullValue(LHSI->getOperand(0)->getType())); | |
2907 | break; | |
2908 | ||
2909 | case Instruction::Load: | |
2910 | // Try to optimize things like "A[i] > 4" to index computations. | |
2911 | if (GetElementPtrInst *GEP = | |
2912 | dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { | |
2913 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | |
2914 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | |
2915 | !cast<LoadInst>(LHSI)->isVolatile()) | |
2916 | if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) | |
2917 | return Res; | |
2918 | } | |
2919 | break; | |
2920 | } | |
2921 | } | |
2922 | ||
2923 | // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. | |
2924 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) | |
2925 | if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) | |
2926 | return NI; | |
2927 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) | |
2928 | if (Instruction *NI = FoldGEPICmp(GEP, Op0, | |
2929 | ICmpInst::getSwappedPredicate(I.getPredicate()), I)) | |
2930 | return NI; | |
2931 | ||
2932 | // Test to see if the operands of the icmp are casted versions of other | |
2933 | // values. If the ptr->ptr cast can be stripped off both arguments, we do so | |
2934 | // now. | |
2935 | if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { | |
2936 | if (Op0->getType()->isPointerTy() && | |
2937 | (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { | |
2938 | // We keep moving the cast from the left operand over to the right | |
2939 | // operand, where it can often be eliminated completely. | |
2940 | Op0 = CI->getOperand(0); | |
2941 | ||
2942 | // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast | |
2943 | // so eliminate it as well. | |
2944 | if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) | |
2945 | Op1 = CI2->getOperand(0); | |
2946 | ||
2947 | // If Op1 is a constant, we can fold the cast into the constant. | |
2948 | if (Op0->getType() != Op1->getType()) { | |
2949 | if (Constant *Op1C = dyn_cast<Constant>(Op1)) { | |
2950 | Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); | |
2951 | } else { | |
2952 | // Otherwise, cast the RHS right before the icmp | |
2953 | Op1 = Builder->CreateBitCast(Op1, Op0->getType()); | |
2954 | } | |
2955 | } | |
2956 | return new ICmpInst(I.getPredicate(), Op0, Op1); | |
2957 | } | |
2958 | } | |
2959 | ||
2960 | if (isa<CastInst>(Op0)) { | |
2961 | // Handle the special case of: icmp (cast bool to X), <cst> | |
2962 | // This comes up when you have code like | |
2963 | // int X = A < B; | |
2964 | // if (X) ... | |
2965 | // For generality, we handle any zero-extension of any operand comparison | |
2966 | // with a constant or another cast from the same type. | |
2967 | if (isa<Constant>(Op1) || isa<CastInst>(Op1)) | |
2968 | if (Instruction *R = visitICmpInstWithCastAndCast(I)) | |
2969 | return R; | |
2970 | } | |
2971 | ||
2972 | // Special logic for binary operators. | |
2973 | BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0); | |
2974 | BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1); | |
2975 | if (BO0 || BO1) { | |
2976 | CmpInst::Predicate Pred = I.getPredicate(); | |
2977 | bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; | |
2978 | if (BO0 && isa<OverflowingBinaryOperator>(BO0)) | |
2979 | NoOp0WrapProblem = ICmpInst::isEquality(Pred) || | |
2980 | (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || | |
2981 | (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); | |
2982 | if (BO1 && isa<OverflowingBinaryOperator>(BO1)) | |
2983 | NoOp1WrapProblem = ICmpInst::isEquality(Pred) || | |
2984 | (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || | |
2985 | (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); | |
2986 | ||
2987 | // Analyze the case when either Op0 or Op1 is an add instruction. | |
2988 | // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). | |
1a4d82fc | 2989 | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; |
223e47cc LB |
2990 | if (BO0 && BO0->getOpcode() == Instruction::Add) |
2991 | A = BO0->getOperand(0), B = BO0->getOperand(1); | |
2992 | if (BO1 && BO1->getOpcode() == Instruction::Add) | |
2993 | C = BO1->getOperand(0), D = BO1->getOperand(1); | |
2994 | ||
2995 | // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. | |
2996 | if ((A == Op1 || B == Op1) && NoOp0WrapProblem) | |
2997 | return new ICmpInst(Pred, A == Op1 ? B : A, | |
2998 | Constant::getNullValue(Op1->getType())); | |
2999 | ||
3000 | // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. | |
3001 | if ((C == Op0 || D == Op0) && NoOp1WrapProblem) | |
3002 | return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), | |
3003 | C == Op0 ? D : C); | |
3004 | ||
3005 | // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. | |
3006 | if (A && C && (A == C || A == D || B == C || B == D) && | |
3007 | NoOp0WrapProblem && NoOp1WrapProblem && | |
3008 | // Try not to increase register pressure. | |
3009 | BO0->hasOneUse() && BO1->hasOneUse()) { | |
3010 | // Determine Y and Z in the form icmp (X+Y), (X+Z). | |
970d7e83 LB |
3011 | Value *Y, *Z; |
3012 | if (A == C) { | |
3013 | // C + B == C + D -> B == D | |
3014 | Y = B; | |
3015 | Z = D; | |
3016 | } else if (A == D) { | |
3017 | // D + B == C + D -> B == C | |
3018 | Y = B; | |
3019 | Z = C; | |
3020 | } else if (B == C) { | |
3021 | // A + C == C + D -> A == D | |
3022 | Y = A; | |
3023 | Z = D; | |
3024 | } else { | |
3025 | assert(B == D); | |
3026 | // A + D == C + D -> A == C | |
3027 | Y = A; | |
3028 | Z = C; | |
3029 | } | |
223e47cc LB |
3030 | return new ICmpInst(Pred, Y, Z); |
3031 | } | |
3032 | ||
1a4d82fc JJ |
3033 | // icmp slt (X + -1), Y -> icmp sle X, Y |
3034 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && | |
3035 | match(B, m_AllOnes())) | |
3036 | return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); | |
3037 | ||
3038 | // icmp sge (X + -1), Y -> icmp sgt X, Y | |
3039 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && | |
3040 | match(B, m_AllOnes())) | |
3041 | return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); | |
3042 | ||
3043 | // icmp sle (X + 1), Y -> icmp slt X, Y | |
3044 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && | |
3045 | match(B, m_One())) | |
3046 | return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); | |
3047 | ||
3048 | // icmp sgt (X + 1), Y -> icmp sge X, Y | |
3049 | if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && | |
3050 | match(B, m_One())) | |
3051 | return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); | |
3052 | ||
3053 | // if C1 has greater magnitude than C2: | |
3054 | // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y | |
3055 | // s.t. C3 = C1 - C2 | |
3056 | // | |
3057 | // if C2 has greater magnitude than C1: | |
3058 | // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) | |
3059 | // s.t. C3 = C2 - C1 | |
3060 | if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && | |
3061 | (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) | |
3062 | if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) | |
3063 | if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { | |
3064 | const APInt &AP1 = C1->getValue(); | |
3065 | const APInt &AP2 = C2->getValue(); | |
3066 | if (AP1.isNegative() == AP2.isNegative()) { | |
3067 | APInt AP1Abs = C1->getValue().abs(); | |
3068 | APInt AP2Abs = C2->getValue().abs(); | |
3069 | if (AP1Abs.uge(AP2Abs)) { | |
3070 | ConstantInt *C3 = Builder->getInt(AP1 - AP2); | |
3071 | Value *NewAdd = Builder->CreateNSWAdd(A, C3); | |
3072 | return new ICmpInst(Pred, NewAdd, C); | |
3073 | } else { | |
3074 | ConstantInt *C3 = Builder->getInt(AP2 - AP1); | |
3075 | Value *NewAdd = Builder->CreateNSWAdd(C, C3); | |
3076 | return new ICmpInst(Pred, A, NewAdd); | |
3077 | } | |
3078 | } | |
3079 | } | |
3080 | ||
3081 | ||
223e47cc LB |
3082 | // Analyze the case when either Op0 or Op1 is a sub instruction. |
3083 | // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). | |
1a4d82fc | 3084 | A = nullptr; B = nullptr; C = nullptr; D = nullptr; |
223e47cc LB |
3085 | if (BO0 && BO0->getOpcode() == Instruction::Sub) |
3086 | A = BO0->getOperand(0), B = BO0->getOperand(1); | |
3087 | if (BO1 && BO1->getOpcode() == Instruction::Sub) | |
3088 | C = BO1->getOperand(0), D = BO1->getOperand(1); | |
3089 | ||
3090 | // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. | |
3091 | if (A == Op1 && NoOp0WrapProblem) | |
3092 | return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); | |
3093 | ||
3094 | // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. | |
3095 | if (C == Op0 && NoOp1WrapProblem) | |
3096 | return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); | |
3097 | ||
3098 | // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. | |
3099 | if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && | |
3100 | // Try not to increase register pressure. | |
3101 | BO0->hasOneUse() && BO1->hasOneUse()) | |
3102 | return new ICmpInst(Pred, A, C); | |
3103 | ||
3104 | // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. | |
3105 | if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && | |
3106 | // Try not to increase register pressure. | |
3107 | BO0->hasOneUse() && BO1->hasOneUse()) | |
3108 | return new ICmpInst(Pred, D, B); | |
3109 | ||
1a4d82fc JJ |
3110 | // icmp (0-X) < cst --> x > -cst |
3111 | if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { | |
3112 | Value *X; | |
3113 | if (match(BO0, m_Neg(m_Value(X)))) | |
3114 | if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) | |
3115 | if (!RHSC->isMinValue(/*isSigned=*/true)) | |
3116 | return new ICmpInst(I.getSwappedPredicate(), X, | |
3117 | ConstantExpr::getNeg(RHSC)); | |
3118 | } | |
3119 | ||
3120 | BinaryOperator *SRem = nullptr; | |
223e47cc LB |
3121 | // icmp (srem X, Y), Y |
3122 | if (BO0 && BO0->getOpcode() == Instruction::SRem && | |
3123 | Op1 == BO0->getOperand(1)) | |
3124 | SRem = BO0; | |
3125 | // icmp Y, (srem X, Y) | |
3126 | else if (BO1 && BO1->getOpcode() == Instruction::SRem && | |
3127 | Op0 == BO1->getOperand(1)) | |
3128 | SRem = BO1; | |
3129 | if (SRem) { | |
3130 | // We don't check hasOneUse to avoid increasing register pressure because | |
3131 | // the value we use is the same value this instruction was already using. | |
3132 | switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { | |
3133 | default: break; | |
3134 | case ICmpInst::ICMP_EQ: | |
3135 | return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); | |
3136 | case ICmpInst::ICMP_NE: | |
3137 | return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); | |
3138 | case ICmpInst::ICMP_SGT: | |
3139 | case ICmpInst::ICMP_SGE: | |
3140 | return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), | |
3141 | Constant::getAllOnesValue(SRem->getType())); | |
3142 | case ICmpInst::ICMP_SLT: | |
3143 | case ICmpInst::ICMP_SLE: | |
3144 | return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), | |
3145 | Constant::getNullValue(SRem->getType())); | |
3146 | } | |
3147 | } | |
3148 | ||
3149 | if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && | |
3150 | BO0->hasOneUse() && BO1->hasOneUse() && | |
3151 | BO0->getOperand(1) == BO1->getOperand(1)) { | |
3152 | switch (BO0->getOpcode()) { | |
3153 | default: break; | |
3154 | case Instruction::Add: | |
3155 | case Instruction::Sub: | |
3156 | case Instruction::Xor: | |
3157 | if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b | |
3158 | return new ICmpInst(I.getPredicate(), BO0->getOperand(0), | |
3159 | BO1->getOperand(0)); | |
3160 | // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b | |
3161 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { | |
3162 | if (CI->getValue().isSignBit()) { | |
3163 | ICmpInst::Predicate Pred = I.isSigned() | |
3164 | ? I.getUnsignedPredicate() | |
3165 | : I.getSignedPredicate(); | |
3166 | return new ICmpInst(Pred, BO0->getOperand(0), | |
3167 | BO1->getOperand(0)); | |
3168 | } | |
3169 | ||
3170 | if (CI->isMaxValue(true)) { | |
3171 | ICmpInst::Predicate Pred = I.isSigned() | |
3172 | ? I.getUnsignedPredicate() | |
3173 | : I.getSignedPredicate(); | |
3174 | Pred = I.getSwappedPredicate(Pred); | |
3175 | return new ICmpInst(Pred, BO0->getOperand(0), | |
3176 | BO1->getOperand(0)); | |
3177 | } | |
3178 | } | |
3179 | break; | |
3180 | case Instruction::Mul: | |
3181 | if (!I.isEquality()) | |
3182 | break; | |
3183 | ||
3184 | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) { | |
3185 | // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask | |
3186 | // Mask = -1 >> count-trailing-zeros(Cst). | |
3187 | if (!CI->isZero() && !CI->isOne()) { | |
3188 | const APInt &AP = CI->getValue(); | |
3189 | ConstantInt *Mask = ConstantInt::get(I.getContext(), | |
3190 | APInt::getLowBitsSet(AP.getBitWidth(), | |
3191 | AP.getBitWidth() - | |
3192 | AP.countTrailingZeros())); | |
3193 | Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); | |
3194 | Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); | |
3195 | return new ICmpInst(I.getPredicate(), And1, And2); | |
3196 | } | |
3197 | } | |
3198 | break; | |
3199 | case Instruction::UDiv: | |
3200 | case Instruction::LShr: | |
3201 | if (I.isSigned()) | |
3202 | break; | |
3203 | // fall-through | |
3204 | case Instruction::SDiv: | |
3205 | case Instruction::AShr: | |
3206 | if (!BO0->isExact() || !BO1->isExact()) | |
3207 | break; | |
3208 | return new ICmpInst(I.getPredicate(), BO0->getOperand(0), | |
3209 | BO1->getOperand(0)); | |
3210 | case Instruction::Shl: { | |
3211 | bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); | |
3212 | bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); | |
3213 | if (!NUW && !NSW) | |
3214 | break; | |
3215 | if (!NSW && I.isSigned()) | |
3216 | break; | |
3217 | return new ICmpInst(I.getPredicate(), BO0->getOperand(0), | |
3218 | BO1->getOperand(0)); | |
3219 | } | |
3220 | } | |
3221 | } | |
3222 | } | |
3223 | ||
3224 | { Value *A, *B; | |
1a4d82fc JJ |
3225 | // Transform (A & ~B) == 0 --> (A & B) != 0 |
3226 | // and (A & ~B) != 0 --> (A & B) == 0 | |
3227 | // if A is a power of 2. | |
3228 | if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && | |
3229 | match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A, false, | |
3230 | 0, AT, &I, DT) && | |
3231 | I.isEquality()) | |
3232 | return new ICmpInst(I.getInversePredicate(), | |
3233 | Builder->CreateAnd(A, B), | |
3234 | Op1); | |
3235 | ||
223e47cc LB |
3236 | // ~x < ~y --> y < x |
3237 | // ~x < cst --> ~cst < x | |
3238 | if (match(Op0, m_Not(m_Value(A)))) { | |
3239 | if (match(Op1, m_Not(m_Value(B)))) | |
3240 | return new ICmpInst(I.getPredicate(), B, A); | |
3241 | if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) | |
3242 | return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); | |
3243 | } | |
3244 | ||
3245 | // (a+b) <u a --> llvm.uadd.with.overflow. | |
3246 | // (a+b) <u b --> llvm.uadd.with.overflow. | |
3247 | if (I.getPredicate() == ICmpInst::ICMP_ULT && | |
3248 | match(Op0, m_Add(m_Value(A), m_Value(B))) && | |
3249 | (Op1 == A || Op1 == B)) | |
3250 | if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) | |
3251 | return R; | |
3252 | ||
3253 | // a >u (a+b) --> llvm.uadd.with.overflow. | |
3254 | // b >u (a+b) --> llvm.uadd.with.overflow. | |
3255 | if (I.getPredicate() == ICmpInst::ICMP_UGT && | |
3256 | match(Op1, m_Add(m_Value(A), m_Value(B))) && | |
3257 | (Op0 == A || Op0 == B)) | |
3258 | if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) | |
3259 | return R; | |
1a4d82fc JJ |
3260 | |
3261 | // (zext a) * (zext b) --> llvm.umul.with.overflow. | |
3262 | if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { | |
3263 | if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this)) | |
3264 | return R; | |
3265 | } | |
3266 | if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { | |
3267 | if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this)) | |
3268 | return R; | |
3269 | } | |
223e47cc LB |
3270 | } |
3271 | ||
3272 | if (I.isEquality()) { | |
3273 | Value *A, *B, *C, *D; | |
3274 | ||
3275 | if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { | |
3276 | if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 | |
3277 | Value *OtherVal = A == Op1 ? B : A; | |
3278 | return new ICmpInst(I.getPredicate(), OtherVal, | |
3279 | Constant::getNullValue(A->getType())); | |
3280 | } | |
3281 | ||
3282 | if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { | |
3283 | // A^c1 == C^c2 --> A == C^(c1^c2) | |
3284 | ConstantInt *C1, *C2; | |
3285 | if (match(B, m_ConstantInt(C1)) && | |
3286 | match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { | |
1a4d82fc | 3287 | Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); |
223e47cc LB |
3288 | Value *Xor = Builder->CreateXor(C, NC); |
3289 | return new ICmpInst(I.getPredicate(), A, Xor); | |
3290 | } | |
3291 | ||
3292 | // A^B == A^D -> B == D | |
3293 | if (A == C) return new ICmpInst(I.getPredicate(), B, D); | |
3294 | if (A == D) return new ICmpInst(I.getPredicate(), B, C); | |
3295 | if (B == C) return new ICmpInst(I.getPredicate(), A, D); | |
3296 | if (B == D) return new ICmpInst(I.getPredicate(), A, C); | |
3297 | } | |
3298 | } | |
3299 | ||
3300 | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && | |
3301 | (A == Op0 || B == Op0)) { | |
3302 | // A == (A^B) -> B == 0 | |
3303 | Value *OtherVal = A == Op0 ? B : A; | |
3304 | return new ICmpInst(I.getPredicate(), OtherVal, | |
3305 | Constant::getNullValue(A->getType())); | |
3306 | } | |
3307 | ||
3308 | // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 | |
3309 | if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && | |
3310 | match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { | |
1a4d82fc | 3311 | Value *X = nullptr, *Y = nullptr, *Z = nullptr; |
223e47cc LB |
3312 | |
3313 | if (A == C) { | |
3314 | X = B; Y = D; Z = A; | |
3315 | } else if (A == D) { | |
3316 | X = B; Y = C; Z = A; | |
3317 | } else if (B == C) { | |
3318 | X = A; Y = D; Z = B; | |
3319 | } else if (B == D) { | |
3320 | X = A; Y = C; Z = B; | |
3321 | } | |
3322 | ||
3323 | if (X) { // Build (X^Y) & Z | |
3324 | Op1 = Builder->CreateXor(X, Y); | |
3325 | Op1 = Builder->CreateAnd(Op1, Z); | |
3326 | I.setOperand(0, Op1); | |
3327 | I.setOperand(1, Constant::getNullValue(Op1->getType())); | |
3328 | return &I; | |
3329 | } | |
3330 | } | |
3331 | ||
3332 | // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B) | |
3333 | // and (B & (1<<X)-1) == (zext A) --> A == (trunc B) | |
3334 | ConstantInt *Cst1; | |
3335 | if ((Op0->hasOneUse() && | |
3336 | match(Op0, m_ZExt(m_Value(A))) && | |
3337 | match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || | |
3338 | (Op1->hasOneUse() && | |
3339 | match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && | |
3340 | match(Op1, m_ZExt(m_Value(A))))) { | |
3341 | APInt Pow2 = Cst1->getValue() + 1; | |
3342 | if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && | |
3343 | Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) | |
3344 | return new ICmpInst(I.getPredicate(), A, | |
3345 | Builder->CreateTrunc(B, A->getType())); | |
3346 | } | |
3347 | ||
1a4d82fc JJ |
3348 | // (A >> C) == (B >> C) --> (A^B) u< (1 << C) |
3349 | // For lshr and ashr pairs. | |
3350 | if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) && | |
3351 | match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) || | |
3352 | (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) && | |
3353 | match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) { | |
3354 | unsigned TypeBits = Cst1->getBitWidth(); | |
3355 | unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); | |
3356 | if (ShAmt < TypeBits && ShAmt != 0) { | |
3357 | ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE | |
3358 | ? ICmpInst::ICMP_UGE | |
3359 | : ICmpInst::ICMP_ULT; | |
3360 | Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); | |
3361 | APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); | |
3362 | return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal)); | |
3363 | } | |
3364 | } | |
3365 | ||
223e47cc LB |
3366 | // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to |
3367 | // "icmp (and X, mask), cst" | |
3368 | uint64_t ShAmt = 0; | |
3369 | if (Op0->hasOneUse() && | |
3370 | match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), | |
3371 | m_ConstantInt(ShAmt))))) && | |
3372 | match(Op1, m_ConstantInt(Cst1)) && | |
3373 | // Only do this when A has multiple uses. This is most important to do | |
3374 | // when it exposes other optimizations. | |
3375 | !A->hasOneUse()) { | |
3376 | unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); | |
3377 | ||
3378 | if (ShAmt < ASize) { | |
3379 | APInt MaskV = | |
3380 | APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); | |
3381 | MaskV <<= ShAmt; | |
3382 | ||
3383 | APInt CmpV = Cst1->getValue().zext(ASize); | |
3384 | CmpV <<= ShAmt; | |
3385 | ||
3386 | Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); | |
3387 | return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); | |
3388 | } | |
3389 | } | |
3390 | } | |
3391 | ||
3392 | { | |
3393 | Value *X; ConstantInt *Cst; | |
3394 | // icmp X+Cst, X | |
3395 | if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) | |
1a4d82fc | 3396 | return FoldICmpAddOpCst(I, X, Cst, I.getPredicate()); |
223e47cc LB |
3397 | |
3398 | // icmp X, X+Cst | |
3399 | if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) | |
1a4d82fc | 3400 | return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); |
223e47cc | 3401 | } |
1a4d82fc | 3402 | return Changed ? &I : nullptr; |
223e47cc LB |
3403 | } |
3404 | ||
223e47cc LB |
3405 | /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. |
3406 | /// | |
3407 | Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, | |
3408 | Instruction *LHSI, | |
3409 | Constant *RHSC) { | |
1a4d82fc | 3410 | if (!isa<ConstantFP>(RHSC)) return nullptr; |
223e47cc LB |
3411 | const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); |
3412 | ||
3413 | // Get the width of the mantissa. We don't want to hack on conversions that | |
3414 | // might lose information from the integer, e.g. "i64 -> float" | |
3415 | int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); | |
1a4d82fc | 3416 | if (MantissaWidth == -1) return nullptr; // Unknown. |
223e47cc LB |
3417 | |
3418 | // Check to see that the input is converted from an integer type that is small | |
3419 | // enough that preserves all bits. TODO: check here for "known" sign bits. | |
3420 | // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. | |
3421 | unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); | |
3422 | ||
3423 | // If this is a uitofp instruction, we need an extra bit to hold the sign. | |
3424 | bool LHSUnsigned = isa<UIToFPInst>(LHSI); | |
3425 | if (LHSUnsigned) | |
3426 | ++InputSize; | |
3427 | ||
3428 | // If the conversion would lose info, don't hack on this. | |
3429 | if ((int)InputSize > MantissaWidth) | |
1a4d82fc | 3430 | return nullptr; |
223e47cc LB |
3431 | |
3432 | // Otherwise, we can potentially simplify the comparison. We know that it | |
3433 | // will always come through as an integer value and we know the constant is | |
3434 | // not a NAN (it would have been previously simplified). | |
3435 | assert(!RHS.isNaN() && "NaN comparison not already folded!"); | |
3436 | ||
3437 | ICmpInst::Predicate Pred; | |
3438 | switch (I.getPredicate()) { | |
3439 | default: llvm_unreachable("Unexpected predicate!"); | |
3440 | case FCmpInst::FCMP_UEQ: | |
3441 | case FCmpInst::FCMP_OEQ: | |
3442 | Pred = ICmpInst::ICMP_EQ; | |
3443 | break; | |
3444 | case FCmpInst::FCMP_UGT: | |
3445 | case FCmpInst::FCMP_OGT: | |
3446 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; | |
3447 | break; | |
3448 | case FCmpInst::FCMP_UGE: | |
3449 | case FCmpInst::FCMP_OGE: | |
3450 | Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; | |
3451 | break; | |
3452 | case FCmpInst::FCMP_ULT: | |
3453 | case FCmpInst::FCMP_OLT: | |
3454 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; | |
3455 | break; | |
3456 | case FCmpInst::FCMP_ULE: | |
3457 | case FCmpInst::FCMP_OLE: | |
3458 | Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; | |
3459 | break; | |
3460 | case FCmpInst::FCMP_UNE: | |
3461 | case FCmpInst::FCMP_ONE: | |
3462 | Pred = ICmpInst::ICMP_NE; | |
3463 | break; | |
3464 | case FCmpInst::FCMP_ORD: | |
1a4d82fc | 3465 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc | 3466 | case FCmpInst::FCMP_UNO: |
1a4d82fc | 3467 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3468 | } |
3469 | ||
3470 | IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); | |
3471 | ||
3472 | // Now we know that the APFloat is a normal number, zero or inf. | |
3473 | ||
3474 | // See if the FP constant is too large for the integer. For example, | |
3475 | // comparing an i8 to 300.0. | |
3476 | unsigned IntWidth = IntTy->getScalarSizeInBits(); | |
3477 | ||
3478 | if (!LHSUnsigned) { | |
3479 | // If the RHS value is > SignedMax, fold the comparison. This handles +INF | |
3480 | // and large values. | |
1a4d82fc | 3481 | APFloat SMax(RHS.getSemantics()); |
223e47cc LB |
3482 | SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, |
3483 | APFloat::rmNearestTiesToEven); | |
3484 | if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 | |
3485 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || | |
3486 | Pred == ICmpInst::ICMP_SLE) | |
1a4d82fc JJ |
3487 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3488 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3489 | } |
3490 | } else { | |
3491 | // If the RHS value is > UnsignedMax, fold the comparison. This handles | |
3492 | // +INF and large values. | |
1a4d82fc | 3493 | APFloat UMax(RHS.getSemantics()); |
223e47cc LB |
3494 | UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, |
3495 | APFloat::rmNearestTiesToEven); | |
3496 | if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 | |
3497 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || | |
3498 | Pred == ICmpInst::ICMP_ULE) | |
1a4d82fc JJ |
3499 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3500 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3501 | } |
3502 | } | |
3503 | ||
3504 | if (!LHSUnsigned) { | |
3505 | // See if the RHS value is < SignedMin. | |
1a4d82fc | 3506 | APFloat SMin(RHS.getSemantics()); |
223e47cc LB |
3507 | SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, |
3508 | APFloat::rmNearestTiesToEven); | |
3509 | if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 | |
3510 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || | |
3511 | Pred == ICmpInst::ICMP_SGE) | |
1a4d82fc JJ |
3512 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3513 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3514 | } |
3515 | } else { | |
3516 | // See if the RHS value is < UnsignedMin. | |
1a4d82fc | 3517 | APFloat SMin(RHS.getSemantics()); |
223e47cc LB |
3518 | SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, |
3519 | APFloat::rmNearestTiesToEven); | |
3520 | if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 | |
3521 | if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || | |
3522 | Pred == ICmpInst::ICMP_UGE) | |
1a4d82fc JJ |
3523 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
3524 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
223e47cc LB |
3525 | } |
3526 | } | |
3527 | ||
3528 | // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or | |
3529 | // [0, UMAX], but it may still be fractional. See if it is fractional by | |
3530 | // casting the FP value to the integer value and back, checking for equality. | |
3531 | // Don't do this for zero, because -0.0 is not fractional. | |
3532 | Constant *RHSInt = LHSUnsigned | |
3533 | ? ConstantExpr::getFPToUI(RHSC, IntTy) | |
3534 | : ConstantExpr::getFPToSI(RHSC, IntTy); | |
3535 | if (!RHS.isZero()) { | |
3536 | bool Equal = LHSUnsigned | |
3537 | ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC | |
3538 | : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; | |
3539 | if (!Equal) { | |
3540 | // If we had a comparison against a fractional value, we have to adjust | |
3541 | // the compare predicate and sometimes the value. RHSC is rounded towards | |
3542 | // zero at this point. | |
3543 | switch (Pred) { | |
3544 | default: llvm_unreachable("Unexpected integer comparison!"); | |
3545 | case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true | |
1a4d82fc | 3546 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc | 3547 | case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false |
1a4d82fc | 3548 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3549 | case ICmpInst::ICMP_ULE: |
3550 | // (float)int <= 4.4 --> int <= 4 | |
3551 | // (float)int <= -4.4 --> false | |
3552 | if (RHS.isNegative()) | |
1a4d82fc | 3553 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3554 | break; |
3555 | case ICmpInst::ICMP_SLE: | |
3556 | // (float)int <= 4.4 --> int <= 4 | |
3557 | // (float)int <= -4.4 --> int < -4 | |
3558 | if (RHS.isNegative()) | |
3559 | Pred = ICmpInst::ICMP_SLT; | |
3560 | break; | |
3561 | case ICmpInst::ICMP_ULT: | |
3562 | // (float)int < -4.4 --> false | |
3563 | // (float)int < 4.4 --> int <= 4 | |
3564 | if (RHS.isNegative()) | |
1a4d82fc | 3565 | return ReplaceInstUsesWith(I, Builder->getFalse()); |
223e47cc LB |
3566 | Pred = ICmpInst::ICMP_ULE; |
3567 | break; | |
3568 | case ICmpInst::ICMP_SLT: | |
3569 | // (float)int < -4.4 --> int < -4 | |
3570 | // (float)int < 4.4 --> int <= 4 | |
3571 | if (!RHS.isNegative()) | |
3572 | Pred = ICmpInst::ICMP_SLE; | |
3573 | break; | |
3574 | case ICmpInst::ICMP_UGT: | |
3575 | // (float)int > 4.4 --> int > 4 | |
3576 | // (float)int > -4.4 --> true | |
3577 | if (RHS.isNegative()) | |
1a4d82fc | 3578 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc LB |
3579 | break; |
3580 | case ICmpInst::ICMP_SGT: | |
3581 | // (float)int > 4.4 --> int > 4 | |
3582 | // (float)int > -4.4 --> int >= -4 | |
3583 | if (RHS.isNegative()) | |
3584 | Pred = ICmpInst::ICMP_SGE; | |
3585 | break; | |
3586 | case ICmpInst::ICMP_UGE: | |
3587 | // (float)int >= -4.4 --> true | |
3588 | // (float)int >= 4.4 --> int > 4 | |
3589 | if (RHS.isNegative()) | |
1a4d82fc | 3590 | return ReplaceInstUsesWith(I, Builder->getTrue()); |
223e47cc LB |
3591 | Pred = ICmpInst::ICMP_UGT; |
3592 | break; | |
3593 | case ICmpInst::ICMP_SGE: | |
3594 | // (float)int >= -4.4 --> int >= -4 | |
3595 | // (float)int >= 4.4 --> int > 4 | |
3596 | if (!RHS.isNegative()) | |
3597 | Pred = ICmpInst::ICMP_SGT; | |
3598 | break; | |
3599 | } | |
3600 | } | |
3601 | } | |
3602 | ||
3603 | // Lower this FP comparison into an appropriate integer version of the | |
3604 | // comparison. | |
3605 | return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); | |
3606 | } | |
3607 | ||
3608 | Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { | |
3609 | bool Changed = false; | |
3610 | ||
3611 | /// Orders the operands of the compare so that they are listed from most | |
3612 | /// complex to least complex. This puts constants before unary operators, | |
3613 | /// before binary operators. | |
3614 | if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { | |
3615 | I.swapOperands(); | |
3616 | Changed = true; | |
3617 | } | |
3618 | ||
3619 | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | |
3620 | ||
1a4d82fc | 3621 | if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AT)) |
223e47cc LB |
3622 | return ReplaceInstUsesWith(I, V); |
3623 | ||
3624 | // Simplify 'fcmp pred X, X' | |
3625 | if (Op0 == Op1) { | |
3626 | switch (I.getPredicate()) { | |
3627 | default: llvm_unreachable("Unknown predicate!"); | |
3628 | case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) | |
3629 | case FCmpInst::FCMP_ULT: // True if unordered or less than | |
3630 | case FCmpInst::FCMP_UGT: // True if unordered or greater than | |
3631 | case FCmpInst::FCMP_UNE: // True if unordered or not equal | |
3632 | // Canonicalize these to be 'fcmp uno %X, 0.0'. | |
3633 | I.setPredicate(FCmpInst::FCMP_UNO); | |
3634 | I.setOperand(1, Constant::getNullValue(Op0->getType())); | |
3635 | return &I; | |
3636 | ||
3637 | case FCmpInst::FCMP_ORD: // True if ordered (no nans) | |
3638 | case FCmpInst::FCMP_OEQ: // True if ordered and equal | |
3639 | case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal | |
3640 | case FCmpInst::FCMP_OLE: // True if ordered and less than or equal | |
3641 | // Canonicalize these to be 'fcmp ord %X, 0.0'. | |
3642 | I.setPredicate(FCmpInst::FCMP_ORD); | |
3643 | I.setOperand(1, Constant::getNullValue(Op0->getType())); | |
3644 | return &I; | |
3645 | } | |
3646 | } | |
3647 | ||
3648 | // Handle fcmp with constant RHS | |
3649 | if (Constant *RHSC = dyn_cast<Constant>(Op1)) { | |
3650 | if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) | |
3651 | switch (LHSI->getOpcode()) { | |
3652 | case Instruction::FPExt: { | |
3653 | // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless | |
3654 | FPExtInst *LHSExt = cast<FPExtInst>(LHSI); | |
3655 | ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); | |
3656 | if (!RHSF) | |
3657 | break; | |
3658 | ||
223e47cc LB |
3659 | const fltSemantics *Sem; |
3660 | // FIXME: This shouldn't be here. | |
3661 | if (LHSExt->getSrcTy()->isHalfTy()) | |
3662 | Sem = &APFloat::IEEEhalf; | |
3663 | else if (LHSExt->getSrcTy()->isFloatTy()) | |
3664 | Sem = &APFloat::IEEEsingle; | |
3665 | else if (LHSExt->getSrcTy()->isDoubleTy()) | |
3666 | Sem = &APFloat::IEEEdouble; | |
3667 | else if (LHSExt->getSrcTy()->isFP128Ty()) | |
3668 | Sem = &APFloat::IEEEquad; | |
3669 | else if (LHSExt->getSrcTy()->isX86_FP80Ty()) | |
3670 | Sem = &APFloat::x87DoubleExtended; | |
970d7e83 LB |
3671 | else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) |
3672 | Sem = &APFloat::PPCDoubleDouble; | |
223e47cc LB |
3673 | else |
3674 | break; | |
3675 | ||
3676 | bool Lossy; | |
3677 | APFloat F = RHSF->getValueAPF(); | |
3678 | F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); | |
3679 | ||
3680 | // Avoid lossy conversions and denormals. Zero is a special case | |
3681 | // that's OK to convert. | |
3682 | APFloat Fabs = F; | |
3683 | Fabs.clearSign(); | |
3684 | if (!Lossy && | |
3685 | ((Fabs.compare(APFloat::getSmallestNormalized(*Sem)) != | |
3686 | APFloat::cmpLessThan) || Fabs.isZero())) | |
3687 | ||
3688 | return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), | |
3689 | ConstantFP::get(RHSC->getContext(), F)); | |
3690 | break; | |
3691 | } | |
3692 | case Instruction::PHI: | |
3693 | // Only fold fcmp into the PHI if the phi and fcmp are in the same | |
3694 | // block. If in the same block, we're encouraging jump threading. If | |
3695 | // not, we are just pessimizing the code by making an i1 phi. | |
3696 | if (LHSI->getParent() == I.getParent()) | |
3697 | if (Instruction *NV = FoldOpIntoPhi(I)) | |
3698 | return NV; | |
3699 | break; | |
3700 | case Instruction::SIToFP: | |
3701 | case Instruction::UIToFP: | |
3702 | if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) | |
3703 | return NV; | |
3704 | break; | |
223e47cc LB |
3705 | case Instruction::FSub: { |
3706 | // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C | |
3707 | Value *Op; | |
3708 | if (match(LHSI, m_FNeg(m_Value(Op)))) | |
3709 | return new FCmpInst(I.getSwappedPredicate(), Op, | |
3710 | ConstantExpr::getFNeg(RHSC)); | |
3711 | break; | |
3712 | } | |
3713 | case Instruction::Load: | |
3714 | if (GetElementPtrInst *GEP = | |
3715 | dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { | |
3716 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) | |
3717 | if (GV->isConstant() && GV->hasDefinitiveInitializer() && | |
3718 | !cast<LoadInst>(LHSI)->isVolatile()) | |
3719 | if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) | |
3720 | return Res; | |
3721 | } | |
3722 | break; | |
3723 | case Instruction::Call: { | |
3724 | CallInst *CI = cast<CallInst>(LHSI); | |
3725 | LibFunc::Func Func; | |
3726 | // Various optimization for fabs compared with zero. | |
3727 | if (RHSC->isNullValue() && CI->getCalledFunction() && | |
3728 | TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && | |
3729 | TLI->has(Func)) { | |
3730 | if (Func == LibFunc::fabs || Func == LibFunc::fabsf || | |
3731 | Func == LibFunc::fabsl) { | |
3732 | switch (I.getPredicate()) { | |
3733 | default: break; | |
3734 | // fabs(x) < 0 --> false | |
3735 | case FCmpInst::FCMP_OLT: | |
3736 | return ReplaceInstUsesWith(I, Builder->getFalse()); | |
3737 | // fabs(x) > 0 --> x != 0 | |
3738 | case FCmpInst::FCMP_OGT: | |
3739 | return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), | |
3740 | RHSC); | |
3741 | // fabs(x) <= 0 --> x == 0 | |
3742 | case FCmpInst::FCMP_OLE: | |
3743 | return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), | |
3744 | RHSC); | |
3745 | // fabs(x) >= 0 --> !isnan(x) | |
3746 | case FCmpInst::FCMP_OGE: | |
3747 | return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), | |
3748 | RHSC); | |
3749 | // fabs(x) == 0 --> x == 0 | |
3750 | // fabs(x) != 0 --> x != 0 | |
3751 | case FCmpInst::FCMP_OEQ: | |
3752 | case FCmpInst::FCMP_UEQ: | |
3753 | case FCmpInst::FCMP_ONE: | |
3754 | case FCmpInst::FCMP_UNE: | |
3755 | return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), | |
3756 | RHSC); | |
3757 | } | |
3758 | } | |
3759 | } | |
3760 | } | |
3761 | } | |
3762 | } | |
3763 | ||
3764 | // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y | |
3765 | Value *X, *Y; | |
3766 | if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) | |
3767 | return new FCmpInst(I.getSwappedPredicate(), X, Y); | |
3768 | ||
3769 | // fcmp (fpext x), (fpext y) -> fcmp x, y | |
3770 | if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) | |
3771 | if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) | |
3772 | if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) | |
3773 | return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), | |
3774 | RHSExt->getOperand(0)); | |
3775 | ||
1a4d82fc | 3776 | return Changed ? &I : nullptr; |
223e47cc | 3777 | } |