]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- APInt.cpp - Implement APInt class ---------------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements a class to represent arbitrary precision integer | |
11 | // constant values and provide a variety of arithmetic operations on them. | |
12 | // | |
13 | //===----------------------------------------------------------------------===// | |
14 | ||
223e47cc LB |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/FoldingSet.h" | |
17 | #include "llvm/ADT/Hashing.h" | |
18 | #include "llvm/ADT/SmallString.h" | |
19 | #include "llvm/ADT/StringRef.h" | |
20 | #include "llvm/Support/Debug.h" | |
21 | #include "llvm/Support/ErrorHandling.h" | |
22 | #include "llvm/Support/MathExtras.h" | |
23 | #include "llvm/Support/raw_ostream.h" | |
24 | #include <cmath> | |
223e47cc | 25 | #include <cstdlib> |
970d7e83 LB |
26 | #include <cstring> |
27 | #include <limits> | |
223e47cc LB |
28 | using namespace llvm; |
29 | ||
1a4d82fc JJ |
30 | #define DEBUG_TYPE "apint" |
31 | ||
223e47cc LB |
32 | /// A utility function for allocating memory, checking for allocation failures, |
33 | /// and ensuring the contents are zeroed. | |
34 | inline static uint64_t* getClearedMemory(unsigned numWords) { | |
35 | uint64_t * result = new uint64_t[numWords]; | |
36 | assert(result && "APInt memory allocation fails!"); | |
37 | memset(result, 0, numWords * sizeof(uint64_t)); | |
38 | return result; | |
39 | } | |
40 | ||
41 | /// A utility function for allocating memory and checking for allocation | |
42 | /// failure. The content is not zeroed. | |
43 | inline static uint64_t* getMemory(unsigned numWords) { | |
44 | uint64_t * result = new uint64_t[numWords]; | |
45 | assert(result && "APInt memory allocation fails!"); | |
46 | return result; | |
47 | } | |
48 | ||
49 | /// A utility function that converts a character to a digit. | |
50 | inline static unsigned getDigit(char cdigit, uint8_t radix) { | |
51 | unsigned r; | |
52 | ||
53 | if (radix == 16 || radix == 36) { | |
54 | r = cdigit - '0'; | |
55 | if (r <= 9) | |
56 | return r; | |
57 | ||
58 | r = cdigit - 'A'; | |
59 | if (r <= radix - 11U) | |
60 | return r + 10; | |
61 | ||
62 | r = cdigit - 'a'; | |
63 | if (r <= radix - 11U) | |
64 | return r + 10; | |
65 | ||
66 | radix = 10; | |
67 | } | |
68 | ||
69 | r = cdigit - '0'; | |
70 | if (r < radix) | |
71 | return r; | |
72 | ||
73 | return -1U; | |
74 | } | |
75 | ||
76 | ||
77 | void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) { | |
78 | pVal = getClearedMemory(getNumWords()); | |
79 | pVal[0] = val; | |
80 | if (isSigned && int64_t(val) < 0) | |
81 | for (unsigned i = 1; i < getNumWords(); ++i) | |
82 | pVal[i] = -1ULL; | |
83 | } | |
84 | ||
85 | void APInt::initSlowCase(const APInt& that) { | |
86 | pVal = getMemory(getNumWords()); | |
87 | memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); | |
88 | } | |
89 | ||
90 | void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { | |
91 | assert(BitWidth && "Bitwidth too small"); | |
92 | assert(bigVal.data() && "Null pointer detected!"); | |
93 | if (isSingleWord()) | |
94 | VAL = bigVal[0]; | |
95 | else { | |
96 | // Get memory, cleared to 0 | |
97 | pVal = getClearedMemory(getNumWords()); | |
98 | // Calculate the number of words to copy | |
99 | unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); | |
100 | // Copy the words from bigVal to pVal | |
101 | memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE); | |
102 | } | |
103 | // Make sure unused high bits are cleared | |
104 | clearUnusedBits(); | |
105 | } | |
106 | ||
107 | APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) | |
108 | : BitWidth(numBits), VAL(0) { | |
109 | initFromArray(bigVal); | |
110 | } | |
111 | ||
112 | APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) | |
113 | : BitWidth(numBits), VAL(0) { | |
114 | initFromArray(makeArrayRef(bigVal, numWords)); | |
115 | } | |
116 | ||
117 | APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) | |
118 | : BitWidth(numbits), VAL(0) { | |
119 | assert(BitWidth && "Bitwidth too small"); | |
120 | fromString(numbits, Str, radix); | |
121 | } | |
122 | ||
123 | APInt& APInt::AssignSlowCase(const APInt& RHS) { | |
124 | // Don't do anything for X = X | |
125 | if (this == &RHS) | |
126 | return *this; | |
127 | ||
128 | if (BitWidth == RHS.getBitWidth()) { | |
129 | // assume same bit-width single-word case is already handled | |
130 | assert(!isSingleWord()); | |
131 | memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); | |
132 | return *this; | |
133 | } | |
134 | ||
135 | if (isSingleWord()) { | |
136 | // assume case where both are single words is already handled | |
137 | assert(!RHS.isSingleWord()); | |
138 | VAL = 0; | |
139 | pVal = getMemory(RHS.getNumWords()); | |
140 | memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); | |
141 | } else if (getNumWords() == RHS.getNumWords()) | |
142 | memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); | |
143 | else if (RHS.isSingleWord()) { | |
144 | delete [] pVal; | |
145 | VAL = RHS.VAL; | |
146 | } else { | |
147 | delete [] pVal; | |
148 | pVal = getMemory(RHS.getNumWords()); | |
149 | memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); | |
150 | } | |
151 | BitWidth = RHS.BitWidth; | |
152 | return clearUnusedBits(); | |
153 | } | |
154 | ||
155 | APInt& APInt::operator=(uint64_t RHS) { | |
156 | if (isSingleWord()) | |
157 | VAL = RHS; | |
158 | else { | |
159 | pVal[0] = RHS; | |
160 | memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | |
161 | } | |
162 | return clearUnusedBits(); | |
163 | } | |
164 | ||
165 | /// Profile - This method 'profiles' an APInt for use with FoldingSet. | |
166 | void APInt::Profile(FoldingSetNodeID& ID) const { | |
167 | ID.AddInteger(BitWidth); | |
168 | ||
169 | if (isSingleWord()) { | |
170 | ID.AddInteger(VAL); | |
171 | return; | |
172 | } | |
173 | ||
174 | unsigned NumWords = getNumWords(); | |
175 | for (unsigned i = 0; i < NumWords; ++i) | |
176 | ID.AddInteger(pVal[i]); | |
177 | } | |
178 | ||
179 | /// add_1 - This function adds a single "digit" integer, y, to the multiple | |
180 | /// "digit" integer array, x[]. x[] is modified to reflect the addition and | |
181 | /// 1 is returned if there is a carry out, otherwise 0 is returned. | |
182 | /// @returns the carry of the addition. | |
183 | static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { | |
184 | for (unsigned i = 0; i < len; ++i) { | |
185 | dest[i] = y + x[i]; | |
186 | if (dest[i] < y) | |
187 | y = 1; // Carry one to next digit. | |
188 | else { | |
189 | y = 0; // No need to carry so exit early | |
190 | break; | |
191 | } | |
192 | } | |
193 | return y; | |
194 | } | |
195 | ||
196 | /// @brief Prefix increment operator. Increments the APInt by one. | |
197 | APInt& APInt::operator++() { | |
198 | if (isSingleWord()) | |
199 | ++VAL; | |
200 | else | |
201 | add_1(pVal, pVal, getNumWords(), 1); | |
202 | return clearUnusedBits(); | |
203 | } | |
204 | ||
205 | /// sub_1 - This function subtracts a single "digit" (64-bit word), y, from | |
206 | /// the multi-digit integer array, x[], propagating the borrowed 1 value until | |
207 | /// no further borrowing is neeeded or it runs out of "digits" in x. The result | |
208 | /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. | |
209 | /// In other words, if y > x then this function returns 1, otherwise 0. | |
210 | /// @returns the borrow out of the subtraction | |
211 | static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { | |
212 | for (unsigned i = 0; i < len; ++i) { | |
213 | uint64_t X = x[i]; | |
214 | x[i] -= y; | |
215 | if (y > X) | |
216 | y = 1; // We have to "borrow 1" from next "digit" | |
217 | else { | |
218 | y = 0; // No need to borrow | |
219 | break; // Remaining digits are unchanged so exit early | |
220 | } | |
221 | } | |
222 | return bool(y); | |
223 | } | |
224 | ||
225 | /// @brief Prefix decrement operator. Decrements the APInt by one. | |
226 | APInt& APInt::operator--() { | |
227 | if (isSingleWord()) | |
228 | --VAL; | |
229 | else | |
230 | sub_1(pVal, getNumWords(), 1); | |
231 | return clearUnusedBits(); | |
232 | } | |
233 | ||
234 | /// add - This function adds the integer array x to the integer array Y and | |
235 | /// places the result in dest. | |
236 | /// @returns the carry out from the addition | |
237 | /// @brief General addition of 64-bit integer arrays | |
238 | static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, | |
239 | unsigned len) { | |
240 | bool carry = false; | |
241 | for (unsigned i = 0; i< len; ++i) { | |
242 | uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x | |
243 | dest[i] = x[i] + y[i] + carry; | |
244 | carry = dest[i] < limit || (carry && dest[i] == limit); | |
245 | } | |
246 | return carry; | |
247 | } | |
248 | ||
249 | /// Adds the RHS APint to this APInt. | |
250 | /// @returns this, after addition of RHS. | |
251 | /// @brief Addition assignment operator. | |
252 | APInt& APInt::operator+=(const APInt& RHS) { | |
253 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
254 | if (isSingleWord()) | |
255 | VAL += RHS.VAL; | |
256 | else { | |
257 | add(pVal, pVal, RHS.pVal, getNumWords()); | |
258 | } | |
259 | return clearUnusedBits(); | |
260 | } | |
261 | ||
262 | /// Subtracts the integer array y from the integer array x | |
263 | /// @returns returns the borrow out. | |
264 | /// @brief Generalized subtraction of 64-bit integer arrays. | |
265 | static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, | |
266 | unsigned len) { | |
267 | bool borrow = false; | |
268 | for (unsigned i = 0; i < len; ++i) { | |
269 | uint64_t x_tmp = borrow ? x[i] - 1 : x[i]; | |
270 | borrow = y[i] > x_tmp || (borrow && x[i] == 0); | |
271 | dest[i] = x_tmp - y[i]; | |
272 | } | |
273 | return borrow; | |
274 | } | |
275 | ||
276 | /// Subtracts the RHS APInt from this APInt | |
277 | /// @returns this, after subtraction | |
278 | /// @brief Subtraction assignment operator. | |
279 | APInt& APInt::operator-=(const APInt& RHS) { | |
280 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
281 | if (isSingleWord()) | |
282 | VAL -= RHS.VAL; | |
283 | else | |
284 | sub(pVal, pVal, RHS.pVal, getNumWords()); | |
285 | return clearUnusedBits(); | |
286 | } | |
287 | ||
288 | /// Multiplies an integer array, x, by a uint64_t integer and places the result | |
289 | /// into dest. | |
290 | /// @returns the carry out of the multiplication. | |
291 | /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. | |
292 | static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { | |
293 | // Split y into high 32-bit part (hy) and low 32-bit part (ly) | |
294 | uint64_t ly = y & 0xffffffffULL, hy = y >> 32; | |
295 | uint64_t carry = 0; | |
296 | ||
297 | // For each digit of x. | |
298 | for (unsigned i = 0; i < len; ++i) { | |
299 | // Split x into high and low words | |
300 | uint64_t lx = x[i] & 0xffffffffULL; | |
301 | uint64_t hx = x[i] >> 32; | |
302 | // hasCarry - A flag to indicate if there is a carry to the next digit. | |
303 | // hasCarry == 0, no carry | |
304 | // hasCarry == 1, has carry | |
305 | // hasCarry == 2, no carry and the calculation result == 0. | |
306 | uint8_t hasCarry = 0; | |
307 | dest[i] = carry + lx * ly; | |
308 | // Determine if the add above introduces carry. | |
309 | hasCarry = (dest[i] < carry) ? 1 : 0; | |
310 | carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0); | |
311 | // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + | |
312 | // (2^32 - 1) + 2^32 = 2^64. | |
313 | hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); | |
314 | ||
315 | carry += (lx * hy) & 0xffffffffULL; | |
316 | dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL); | |
317 | carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + | |
318 | (carry >> 32) + ((lx * hy) >> 32) + hx * hy; | |
319 | } | |
320 | return carry; | |
321 | } | |
322 | ||
323 | /// Multiplies integer array x by integer array y and stores the result into | |
324 | /// the integer array dest. Note that dest's size must be >= xlen + ylen. | |
325 | /// @brief Generalized multiplicate of integer arrays. | |
326 | static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], | |
327 | unsigned ylen) { | |
328 | dest[xlen] = mul_1(dest, x, xlen, y[0]); | |
329 | for (unsigned i = 1; i < ylen; ++i) { | |
330 | uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32; | |
331 | uint64_t carry = 0, lx = 0, hx = 0; | |
332 | for (unsigned j = 0; j < xlen; ++j) { | |
333 | lx = x[j] & 0xffffffffULL; | |
334 | hx = x[j] >> 32; | |
335 | // hasCarry - A flag to indicate if has carry. | |
336 | // hasCarry == 0, no carry | |
337 | // hasCarry == 1, has carry | |
338 | // hasCarry == 2, no carry and the calculation result == 0. | |
339 | uint8_t hasCarry = 0; | |
340 | uint64_t resul = carry + lx * ly; | |
341 | hasCarry = (resul < carry) ? 1 : 0; | |
342 | carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32); | |
343 | hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); | |
344 | ||
345 | carry += (lx * hy) & 0xffffffffULL; | |
346 | resul = (carry << 32) | (resul & 0xffffffffULL); | |
347 | dest[i+j] += resul; | |
348 | carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+ | |
349 | (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + | |
350 | ((lx * hy) >> 32) + hx * hy; | |
351 | } | |
352 | dest[i+xlen] = carry; | |
353 | } | |
354 | } | |
355 | ||
356 | APInt& APInt::operator*=(const APInt& RHS) { | |
357 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
358 | if (isSingleWord()) { | |
359 | VAL *= RHS.VAL; | |
360 | clearUnusedBits(); | |
361 | return *this; | |
362 | } | |
363 | ||
364 | // Get some bit facts about LHS and check for zero | |
365 | unsigned lhsBits = getActiveBits(); | |
366 | unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1; | |
367 | if (!lhsWords) | |
368 | // 0 * X ===> 0 | |
369 | return *this; | |
370 | ||
371 | // Get some bit facts about RHS and check for zero | |
372 | unsigned rhsBits = RHS.getActiveBits(); | |
373 | unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1; | |
374 | if (!rhsWords) { | |
375 | // X * 0 ===> 0 | |
376 | clearAllBits(); | |
377 | return *this; | |
378 | } | |
379 | ||
380 | // Allocate space for the result | |
381 | unsigned destWords = rhsWords + lhsWords; | |
382 | uint64_t *dest = getMemory(destWords); | |
383 | ||
384 | // Perform the long multiply | |
385 | mul(dest, pVal, lhsWords, RHS.pVal, rhsWords); | |
386 | ||
387 | // Copy result back into *this | |
388 | clearAllBits(); | |
389 | unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; | |
390 | memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); | |
391 | clearUnusedBits(); | |
392 | ||
393 | // delete dest array and return | |
394 | delete[] dest; | |
395 | return *this; | |
396 | } | |
397 | ||
398 | APInt& APInt::operator&=(const APInt& RHS) { | |
399 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
400 | if (isSingleWord()) { | |
401 | VAL &= RHS.VAL; | |
402 | return *this; | |
403 | } | |
404 | unsigned numWords = getNumWords(); | |
405 | for (unsigned i = 0; i < numWords; ++i) | |
406 | pVal[i] &= RHS.pVal[i]; | |
407 | return *this; | |
408 | } | |
409 | ||
410 | APInt& APInt::operator|=(const APInt& RHS) { | |
411 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
412 | if (isSingleWord()) { | |
413 | VAL |= RHS.VAL; | |
414 | return *this; | |
415 | } | |
416 | unsigned numWords = getNumWords(); | |
417 | for (unsigned i = 0; i < numWords; ++i) | |
418 | pVal[i] |= RHS.pVal[i]; | |
419 | return *this; | |
420 | } | |
421 | ||
422 | APInt& APInt::operator^=(const APInt& RHS) { | |
423 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
424 | if (isSingleWord()) { | |
425 | VAL ^= RHS.VAL; | |
426 | this->clearUnusedBits(); | |
427 | return *this; | |
428 | } | |
429 | unsigned numWords = getNumWords(); | |
430 | for (unsigned i = 0; i < numWords; ++i) | |
431 | pVal[i] ^= RHS.pVal[i]; | |
432 | return clearUnusedBits(); | |
433 | } | |
434 | ||
435 | APInt APInt::AndSlowCase(const APInt& RHS) const { | |
436 | unsigned numWords = getNumWords(); | |
437 | uint64_t* val = getMemory(numWords); | |
438 | for (unsigned i = 0; i < numWords; ++i) | |
439 | val[i] = pVal[i] & RHS.pVal[i]; | |
440 | return APInt(val, getBitWidth()); | |
441 | } | |
442 | ||
443 | APInt APInt::OrSlowCase(const APInt& RHS) const { | |
444 | unsigned numWords = getNumWords(); | |
445 | uint64_t *val = getMemory(numWords); | |
446 | for (unsigned i = 0; i < numWords; ++i) | |
447 | val[i] = pVal[i] | RHS.pVal[i]; | |
448 | return APInt(val, getBitWidth()); | |
449 | } | |
450 | ||
451 | APInt APInt::XorSlowCase(const APInt& RHS) const { | |
452 | unsigned numWords = getNumWords(); | |
453 | uint64_t *val = getMemory(numWords); | |
454 | for (unsigned i = 0; i < numWords; ++i) | |
455 | val[i] = pVal[i] ^ RHS.pVal[i]; | |
456 | ||
457 | // 0^0==1 so clear the high bits in case they got set. | |
458 | return APInt(val, getBitWidth()).clearUnusedBits(); | |
459 | } | |
460 | ||
461 | APInt APInt::operator*(const APInt& RHS) const { | |
462 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
463 | if (isSingleWord()) | |
464 | return APInt(BitWidth, VAL * RHS.VAL); | |
465 | APInt Result(*this); | |
466 | Result *= RHS; | |
467 | return Result; | |
468 | } | |
469 | ||
470 | APInt APInt::operator+(const APInt& RHS) const { | |
471 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
472 | if (isSingleWord()) | |
473 | return APInt(BitWidth, VAL + RHS.VAL); | |
474 | APInt Result(BitWidth, 0); | |
475 | add(Result.pVal, this->pVal, RHS.pVal, getNumWords()); | |
476 | return Result.clearUnusedBits(); | |
477 | } | |
478 | ||
479 | APInt APInt::operator-(const APInt& RHS) const { | |
480 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
481 | if (isSingleWord()) | |
482 | return APInt(BitWidth, VAL - RHS.VAL); | |
483 | APInt Result(BitWidth, 0); | |
484 | sub(Result.pVal, this->pVal, RHS.pVal, getNumWords()); | |
485 | return Result.clearUnusedBits(); | |
486 | } | |
487 | ||
488 | bool APInt::EqualSlowCase(const APInt& RHS) const { | |
489 | // Get some facts about the number of bits used in the two operands. | |
490 | unsigned n1 = getActiveBits(); | |
491 | unsigned n2 = RHS.getActiveBits(); | |
492 | ||
493 | // If the number of bits isn't the same, they aren't equal | |
494 | if (n1 != n2) | |
495 | return false; | |
496 | ||
497 | // If the number of bits fits in a word, we only need to compare the low word. | |
498 | if (n1 <= APINT_BITS_PER_WORD) | |
499 | return pVal[0] == RHS.pVal[0]; | |
500 | ||
501 | // Otherwise, compare everything | |
502 | for (int i = whichWord(n1 - 1); i >= 0; --i) | |
503 | if (pVal[i] != RHS.pVal[i]) | |
504 | return false; | |
505 | return true; | |
506 | } | |
507 | ||
508 | bool APInt::EqualSlowCase(uint64_t Val) const { | |
509 | unsigned n = getActiveBits(); | |
510 | if (n <= APINT_BITS_PER_WORD) | |
511 | return pVal[0] == Val; | |
512 | else | |
513 | return false; | |
514 | } | |
515 | ||
516 | bool APInt::ult(const APInt& RHS) const { | |
517 | assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); | |
518 | if (isSingleWord()) | |
519 | return VAL < RHS.VAL; | |
520 | ||
521 | // Get active bit length of both operands | |
522 | unsigned n1 = getActiveBits(); | |
523 | unsigned n2 = RHS.getActiveBits(); | |
524 | ||
525 | // If magnitude of LHS is less than RHS, return true. | |
526 | if (n1 < n2) | |
527 | return true; | |
528 | ||
529 | // If magnitude of RHS is greather than LHS, return false. | |
530 | if (n2 < n1) | |
531 | return false; | |
532 | ||
533 | // If they bot fit in a word, just compare the low order word | |
534 | if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) | |
535 | return pVal[0] < RHS.pVal[0]; | |
536 | ||
537 | // Otherwise, compare all words | |
538 | unsigned topWord = whichWord(std::max(n1,n2)-1); | |
539 | for (int i = topWord; i >= 0; --i) { | |
540 | if (pVal[i] > RHS.pVal[i]) | |
541 | return false; | |
542 | if (pVal[i] < RHS.pVal[i]) | |
543 | return true; | |
544 | } | |
545 | return false; | |
546 | } | |
547 | ||
548 | bool APInt::slt(const APInt& RHS) const { | |
549 | assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); | |
550 | if (isSingleWord()) { | |
551 | int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth); | |
552 | int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth); | |
553 | return lhsSext < rhsSext; | |
554 | } | |
555 | ||
556 | APInt lhs(*this); | |
557 | APInt rhs(RHS); | |
558 | bool lhsNeg = isNegative(); | |
559 | bool rhsNeg = rhs.isNegative(); | |
560 | if (lhsNeg) { | |
561 | // Sign bit is set so perform two's complement to make it positive | |
562 | lhs.flipAllBits(); | |
1a4d82fc | 563 | ++lhs; |
223e47cc LB |
564 | } |
565 | if (rhsNeg) { | |
566 | // Sign bit is set so perform two's complement to make it positive | |
567 | rhs.flipAllBits(); | |
1a4d82fc | 568 | ++rhs; |
223e47cc LB |
569 | } |
570 | ||
571 | // Now we have unsigned values to compare so do the comparison if necessary | |
572 | // based on the negativeness of the values. | |
573 | if (lhsNeg) | |
574 | if (rhsNeg) | |
575 | return lhs.ugt(rhs); | |
576 | else | |
577 | return true; | |
578 | else if (rhsNeg) | |
579 | return false; | |
580 | else | |
581 | return lhs.ult(rhs); | |
582 | } | |
583 | ||
584 | void APInt::setBit(unsigned bitPosition) { | |
585 | if (isSingleWord()) | |
586 | VAL |= maskBit(bitPosition); | |
587 | else | |
588 | pVal[whichWord(bitPosition)] |= maskBit(bitPosition); | |
589 | } | |
590 | ||
591 | /// Set the given bit to 0 whose position is given as "bitPosition". | |
592 | /// @brief Set a given bit to 0. | |
593 | void APInt::clearBit(unsigned bitPosition) { | |
594 | if (isSingleWord()) | |
595 | VAL &= ~maskBit(bitPosition); | |
596 | else | |
597 | pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); | |
598 | } | |
599 | ||
600 | /// @brief Toggle every bit to its opposite value. | |
601 | ||
602 | /// Toggle a given bit to its opposite value whose position is given | |
603 | /// as "bitPosition". | |
604 | /// @brief Toggles a given bit to its opposite value. | |
605 | void APInt::flipBit(unsigned bitPosition) { | |
606 | assert(bitPosition < BitWidth && "Out of the bit-width range!"); | |
607 | if ((*this)[bitPosition]) clearBit(bitPosition); | |
608 | else setBit(bitPosition); | |
609 | } | |
610 | ||
611 | unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { | |
612 | assert(!str.empty() && "Invalid string length"); | |
613 | assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || | |
614 | radix == 36) && | |
615 | "Radix should be 2, 8, 10, 16, or 36!"); | |
616 | ||
617 | size_t slen = str.size(); | |
618 | ||
619 | // Each computation below needs to know if it's negative. | |
620 | StringRef::iterator p = str.begin(); | |
621 | unsigned isNegative = *p == '-'; | |
622 | if (*p == '-' || *p == '+') { | |
623 | p++; | |
624 | slen--; | |
625 | assert(slen && "String is only a sign, needs a value."); | |
626 | } | |
627 | ||
628 | // For radixes of power-of-two values, the bits required is accurately and | |
629 | // easily computed | |
630 | if (radix == 2) | |
631 | return slen + isNegative; | |
632 | if (radix == 8) | |
633 | return slen * 3 + isNegative; | |
634 | if (radix == 16) | |
635 | return slen * 4 + isNegative; | |
636 | ||
637 | // FIXME: base 36 | |
638 | ||
639 | // This is grossly inefficient but accurate. We could probably do something | |
640 | // with a computation of roughly slen*64/20 and then adjust by the value of | |
641 | // the first few digits. But, I'm not sure how accurate that could be. | |
642 | ||
643 | // Compute a sufficient number of bits that is always large enough but might | |
644 | // be too large. This avoids the assertion in the constructor. This | |
645 | // calculation doesn't work appropriately for the numbers 0-9, so just use 4 | |
646 | // bits in that case. | |
647 | unsigned sufficient | |
648 | = radix == 10? (slen == 1 ? 4 : slen * 64/18) | |
649 | : (slen == 1 ? 7 : slen * 16/3); | |
650 | ||
651 | // Convert to the actual binary value. | |
652 | APInt tmp(sufficient, StringRef(p, slen), radix); | |
653 | ||
654 | // Compute how many bits are required. If the log is infinite, assume we need | |
655 | // just bit. | |
656 | unsigned log = tmp.logBase2(); | |
657 | if (log == (unsigned)-1) { | |
658 | return isNegative + 1; | |
659 | } else { | |
660 | return isNegative + log + 1; | |
661 | } | |
662 | } | |
663 | ||
664 | hash_code llvm::hash_value(const APInt &Arg) { | |
665 | if (Arg.isSingleWord()) | |
666 | return hash_combine(Arg.VAL); | |
667 | ||
668 | return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords()); | |
669 | } | |
670 | ||
671 | /// HiBits - This function returns the high "numBits" bits of this APInt. | |
672 | APInt APInt::getHiBits(unsigned numBits) const { | |
673 | return APIntOps::lshr(*this, BitWidth - numBits); | |
674 | } | |
675 | ||
676 | /// LoBits - This function returns the low "numBits" bits of this APInt. | |
677 | APInt APInt::getLoBits(unsigned numBits) const { | |
678 | return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), | |
679 | BitWidth - numBits); | |
680 | } | |
681 | ||
682 | unsigned APInt::countLeadingZerosSlowCase() const { | |
683 | // Treat the most significand word differently because it might have | |
684 | // meaningless bits set beyond the precision. | |
685 | unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD; | |
686 | integerPart MSWMask; | |
687 | if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1; | |
688 | else { | |
689 | MSWMask = ~integerPart(0); | |
690 | BitsInMSW = APINT_BITS_PER_WORD; | |
691 | } | |
692 | ||
693 | unsigned i = getNumWords(); | |
694 | integerPart MSW = pVal[i-1] & MSWMask; | |
695 | if (MSW) | |
1a4d82fc | 696 | return llvm::countLeadingZeros(MSW) - (APINT_BITS_PER_WORD - BitsInMSW); |
223e47cc LB |
697 | |
698 | unsigned Count = BitsInMSW; | |
699 | for (--i; i > 0u; --i) { | |
700 | if (pVal[i-1] == 0) | |
701 | Count += APINT_BITS_PER_WORD; | |
702 | else { | |
1a4d82fc | 703 | Count += llvm::countLeadingZeros(pVal[i-1]); |
223e47cc LB |
704 | break; |
705 | } | |
706 | } | |
707 | return Count; | |
708 | } | |
709 | ||
710 | unsigned APInt::countLeadingOnes() const { | |
711 | if (isSingleWord()) | |
712 | return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth)); | |
713 | ||
714 | unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; | |
715 | unsigned shift; | |
716 | if (!highWordBits) { | |
717 | highWordBits = APINT_BITS_PER_WORD; | |
718 | shift = 0; | |
719 | } else { | |
720 | shift = APINT_BITS_PER_WORD - highWordBits; | |
721 | } | |
722 | int i = getNumWords() - 1; | |
723 | unsigned Count = CountLeadingOnes_64(pVal[i] << shift); | |
724 | if (Count == highWordBits) { | |
725 | for (i--; i >= 0; --i) { | |
726 | if (pVal[i] == -1ULL) | |
727 | Count += APINT_BITS_PER_WORD; | |
728 | else { | |
729 | Count += CountLeadingOnes_64(pVal[i]); | |
730 | break; | |
731 | } | |
732 | } | |
733 | } | |
734 | return Count; | |
735 | } | |
736 | ||
737 | unsigned APInt::countTrailingZeros() const { | |
738 | if (isSingleWord()) | |
1a4d82fc | 739 | return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth); |
223e47cc LB |
740 | unsigned Count = 0; |
741 | unsigned i = 0; | |
742 | for (; i < getNumWords() && pVal[i] == 0; ++i) | |
743 | Count += APINT_BITS_PER_WORD; | |
744 | if (i < getNumWords()) | |
1a4d82fc | 745 | Count += llvm::countTrailingZeros(pVal[i]); |
223e47cc LB |
746 | return std::min(Count, BitWidth); |
747 | } | |
748 | ||
749 | unsigned APInt::countTrailingOnesSlowCase() const { | |
750 | unsigned Count = 0; | |
751 | unsigned i = 0; | |
752 | for (; i < getNumWords() && pVal[i] == -1ULL; ++i) | |
753 | Count += APINT_BITS_PER_WORD; | |
754 | if (i < getNumWords()) | |
755 | Count += CountTrailingOnes_64(pVal[i]); | |
756 | return std::min(Count, BitWidth); | |
757 | } | |
758 | ||
759 | unsigned APInt::countPopulationSlowCase() const { | |
760 | unsigned Count = 0; | |
761 | for (unsigned i = 0; i < getNumWords(); ++i) | |
762 | Count += CountPopulation_64(pVal[i]); | |
763 | return Count; | |
764 | } | |
765 | ||
766 | /// Perform a logical right-shift from Src to Dst, which must be equal or | |
767 | /// non-overlapping, of Words words, by Shift, which must be less than 64. | |
768 | static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words, | |
769 | unsigned Shift) { | |
770 | uint64_t Carry = 0; | |
771 | for (int I = Words - 1; I >= 0; --I) { | |
772 | uint64_t Tmp = Src[I]; | |
773 | Dst[I] = (Tmp >> Shift) | Carry; | |
774 | Carry = Tmp << (64 - Shift); | |
775 | } | |
776 | } | |
777 | ||
778 | APInt APInt::byteSwap() const { | |
779 | assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); | |
780 | if (BitWidth == 16) | |
781 | return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); | |
782 | if (BitWidth == 32) | |
783 | return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); | |
784 | if (BitWidth == 48) { | |
785 | unsigned Tmp1 = unsigned(VAL >> 16); | |
786 | Tmp1 = ByteSwap_32(Tmp1); | |
787 | uint16_t Tmp2 = uint16_t(VAL); | |
788 | Tmp2 = ByteSwap_16(Tmp2); | |
789 | return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); | |
790 | } | |
791 | if (BitWidth == 64) | |
792 | return APInt(BitWidth, ByteSwap_64(VAL)); | |
793 | ||
794 | APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); | |
795 | for (unsigned I = 0, N = getNumWords(); I != N; ++I) | |
796 | Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); | |
797 | if (Result.BitWidth != BitWidth) { | |
798 | lshrNear(Result.pVal, Result.pVal, getNumWords(), | |
799 | Result.BitWidth - BitWidth); | |
800 | Result.BitWidth = BitWidth; | |
801 | } | |
802 | return Result; | |
803 | } | |
804 | ||
805 | APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, | |
806 | const APInt& API2) { | |
807 | APInt A = API1, B = API2; | |
808 | while (!!B) { | |
809 | APInt T = B; | |
810 | B = APIntOps::urem(A, B); | |
811 | A = T; | |
812 | } | |
813 | return A; | |
814 | } | |
815 | ||
816 | APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { | |
817 | union { | |
818 | double D; | |
819 | uint64_t I; | |
820 | } T; | |
821 | T.D = Double; | |
822 | ||
823 | // Get the sign bit from the highest order bit | |
824 | bool isNeg = T.I >> 63; | |
825 | ||
826 | // Get the 11-bit exponent and adjust for the 1023 bit bias | |
827 | int64_t exp = ((T.I >> 52) & 0x7ff) - 1023; | |
828 | ||
829 | // If the exponent is negative, the value is < 0 so just return 0. | |
830 | if (exp < 0) | |
831 | return APInt(width, 0u); | |
832 | ||
833 | // Extract the mantissa by clearing the top 12 bits (sign + exponent). | |
834 | uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52; | |
835 | ||
836 | // If the exponent doesn't shift all bits out of the mantissa | |
837 | if (exp < 52) | |
838 | return isNeg ? -APInt(width, mantissa >> (52 - exp)) : | |
839 | APInt(width, mantissa >> (52 - exp)); | |
840 | ||
841 | // If the client didn't provide enough bits for us to shift the mantissa into | |
842 | // then the result is undefined, just return 0 | |
843 | if (width <= exp - 52) | |
844 | return APInt(width, 0); | |
845 | ||
846 | // Otherwise, we have to shift the mantissa bits up to the right location | |
847 | APInt Tmp(width, mantissa); | |
848 | Tmp = Tmp.shl((unsigned)exp - 52); | |
849 | return isNeg ? -Tmp : Tmp; | |
850 | } | |
851 | ||
852 | /// RoundToDouble - This function converts this APInt to a double. | |
853 | /// The layout for double is as following (IEEE Standard 754): | |
854 | /// -------------------------------------- | |
855 | /// | Sign Exponent Fraction Bias | | |
856 | /// |-------------------------------------- | | |
857 | /// | 1[63] 11[62-52] 52[51-00] 1023 | | |
858 | /// -------------------------------------- | |
859 | double APInt::roundToDouble(bool isSigned) const { | |
860 | ||
861 | // Handle the simple case where the value is contained in one uint64_t. | |
862 | // It is wrong to optimize getWord(0) to VAL; there might be more than one word. | |
863 | if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { | |
864 | if (isSigned) { | |
865 | int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth); | |
866 | return double(sext); | |
867 | } else | |
868 | return double(getWord(0)); | |
869 | } | |
870 | ||
871 | // Determine if the value is negative. | |
872 | bool isNeg = isSigned ? (*this)[BitWidth-1] : false; | |
873 | ||
874 | // Construct the absolute value if we're negative. | |
875 | APInt Tmp(isNeg ? -(*this) : (*this)); | |
876 | ||
877 | // Figure out how many bits we're using. | |
878 | unsigned n = Tmp.getActiveBits(); | |
879 | ||
880 | // The exponent (without bias normalization) is just the number of bits | |
881 | // we are using. Note that the sign bit is gone since we constructed the | |
882 | // absolute value. | |
883 | uint64_t exp = n; | |
884 | ||
885 | // Return infinity for exponent overflow | |
886 | if (exp > 1023) { | |
887 | if (!isSigned || !isNeg) | |
888 | return std::numeric_limits<double>::infinity(); | |
889 | else | |
890 | return -std::numeric_limits<double>::infinity(); | |
891 | } | |
892 | exp += 1023; // Increment for 1023 bias | |
893 | ||
894 | // Number of bits in mantissa is 52. To obtain the mantissa value, we must | |
895 | // extract the high 52 bits from the correct words in pVal. | |
896 | uint64_t mantissa; | |
897 | unsigned hiWord = whichWord(n-1); | |
898 | if (hiWord == 0) { | |
899 | mantissa = Tmp.pVal[0]; | |
900 | if (n > 52) | |
901 | mantissa >>= n - 52; // shift down, we want the top 52 bits. | |
902 | } else { | |
903 | assert(hiWord > 0 && "huh?"); | |
904 | uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); | |
905 | uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); | |
906 | mantissa = hibits | lobits; | |
907 | } | |
908 | ||
909 | // The leading bit of mantissa is implicit, so get rid of it. | |
910 | uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; | |
911 | union { | |
912 | double D; | |
913 | uint64_t I; | |
914 | } T; | |
915 | T.I = sign | (exp << 52) | mantissa; | |
916 | return T.D; | |
917 | } | |
918 | ||
919 | // Truncate to new width. | |
920 | APInt APInt::trunc(unsigned width) const { | |
921 | assert(width < BitWidth && "Invalid APInt Truncate request"); | |
922 | assert(width && "Can't truncate to 0 bits"); | |
923 | ||
924 | if (width <= APINT_BITS_PER_WORD) | |
925 | return APInt(width, getRawData()[0]); | |
926 | ||
927 | APInt Result(getMemory(getNumWords(width)), width); | |
928 | ||
929 | // Copy full words. | |
930 | unsigned i; | |
931 | for (i = 0; i != width / APINT_BITS_PER_WORD; i++) | |
932 | Result.pVal[i] = pVal[i]; | |
933 | ||
934 | // Truncate and copy any partial word. | |
935 | unsigned bits = (0 - width) % APINT_BITS_PER_WORD; | |
936 | if (bits != 0) | |
937 | Result.pVal[i] = pVal[i] << bits >> bits; | |
938 | ||
939 | return Result; | |
940 | } | |
941 | ||
942 | // Sign extend to a new width. | |
943 | APInt APInt::sext(unsigned width) const { | |
944 | assert(width > BitWidth && "Invalid APInt SignExtend request"); | |
945 | ||
946 | if (width <= APINT_BITS_PER_WORD) { | |
947 | uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth); | |
948 | val = (int64_t)val >> (width - BitWidth); | |
949 | return APInt(width, val >> (APINT_BITS_PER_WORD - width)); | |
950 | } | |
951 | ||
952 | APInt Result(getMemory(getNumWords(width)), width); | |
953 | ||
954 | // Copy full words. | |
955 | unsigned i; | |
956 | uint64_t word = 0; | |
957 | for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) { | |
958 | word = getRawData()[i]; | |
959 | Result.pVal[i] = word; | |
960 | } | |
961 | ||
962 | // Read and sign-extend any partial word. | |
963 | unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD; | |
964 | if (bits != 0) | |
965 | word = (int64_t)getRawData()[i] << bits >> bits; | |
966 | else | |
967 | word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); | |
968 | ||
969 | // Write remaining full words. | |
970 | for (; i != width / APINT_BITS_PER_WORD; i++) { | |
971 | Result.pVal[i] = word; | |
972 | word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); | |
973 | } | |
974 | ||
975 | // Write any partial word. | |
976 | bits = (0 - width) % APINT_BITS_PER_WORD; | |
977 | if (bits != 0) | |
978 | Result.pVal[i] = word << bits >> bits; | |
979 | ||
980 | return Result; | |
981 | } | |
982 | ||
983 | // Zero extend to a new width. | |
984 | APInt APInt::zext(unsigned width) const { | |
985 | assert(width > BitWidth && "Invalid APInt ZeroExtend request"); | |
986 | ||
987 | if (width <= APINT_BITS_PER_WORD) | |
988 | return APInt(width, VAL); | |
989 | ||
990 | APInt Result(getMemory(getNumWords(width)), width); | |
991 | ||
992 | // Copy words. | |
993 | unsigned i; | |
994 | for (i = 0; i != getNumWords(); i++) | |
995 | Result.pVal[i] = getRawData()[i]; | |
996 | ||
997 | // Zero remaining words. | |
998 | memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE); | |
999 | ||
1000 | return Result; | |
1001 | } | |
1002 | ||
1003 | APInt APInt::zextOrTrunc(unsigned width) const { | |
1004 | if (BitWidth < width) | |
1005 | return zext(width); | |
1006 | if (BitWidth > width) | |
1007 | return trunc(width); | |
1008 | return *this; | |
1009 | } | |
1010 | ||
1011 | APInt APInt::sextOrTrunc(unsigned width) const { | |
1012 | if (BitWidth < width) | |
1013 | return sext(width); | |
1014 | if (BitWidth > width) | |
1015 | return trunc(width); | |
1016 | return *this; | |
1017 | } | |
1018 | ||
1019 | APInt APInt::zextOrSelf(unsigned width) const { | |
1020 | if (BitWidth < width) | |
1021 | return zext(width); | |
1022 | return *this; | |
1023 | } | |
1024 | ||
1025 | APInt APInt::sextOrSelf(unsigned width) const { | |
1026 | if (BitWidth < width) | |
1027 | return sext(width); | |
1028 | return *this; | |
1029 | } | |
1030 | ||
1031 | /// Arithmetic right-shift this APInt by shiftAmt. | |
1032 | /// @brief Arithmetic right-shift function. | |
1033 | APInt APInt::ashr(const APInt &shiftAmt) const { | |
1034 | return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth)); | |
1035 | } | |
1036 | ||
1037 | /// Arithmetic right-shift this APInt by shiftAmt. | |
1038 | /// @brief Arithmetic right-shift function. | |
1039 | APInt APInt::ashr(unsigned shiftAmt) const { | |
1040 | assert(shiftAmt <= BitWidth && "Invalid shift amount"); | |
1041 | // Handle a degenerate case | |
1042 | if (shiftAmt == 0) | |
1043 | return *this; | |
1044 | ||
1045 | // Handle single word shifts with built-in ashr | |
1046 | if (isSingleWord()) { | |
1047 | if (shiftAmt == BitWidth) | |
1048 | return APInt(BitWidth, 0); // undefined | |
1049 | else { | |
1050 | unsigned SignBit = APINT_BITS_PER_WORD - BitWidth; | |
1051 | return APInt(BitWidth, | |
1052 | (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt)); | |
1053 | } | |
1054 | } | |
1055 | ||
1056 | // If all the bits were shifted out, the result is, technically, undefined. | |
1057 | // We return -1 if it was negative, 0 otherwise. We check this early to avoid | |
1058 | // issues in the algorithm below. | |
1059 | if (shiftAmt == BitWidth) { | |
1060 | if (isNegative()) | |
1061 | return APInt(BitWidth, -1ULL, true); | |
1062 | else | |
1063 | return APInt(BitWidth, 0); | |
1064 | } | |
1065 | ||
1066 | // Create some space for the result. | |
1067 | uint64_t * val = new uint64_t[getNumWords()]; | |
1068 | ||
1069 | // Compute some values needed by the following shift algorithms | |
1070 | unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word | |
1071 | unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift | |
1072 | unsigned breakWord = getNumWords() - 1 - offset; // last word affected | |
1073 | unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word? | |
1074 | if (bitsInWord == 0) | |
1075 | bitsInWord = APINT_BITS_PER_WORD; | |
1076 | ||
1077 | // If we are shifting whole words, just move whole words | |
1078 | if (wordShift == 0) { | |
1079 | // Move the words containing significant bits | |
1080 | for (unsigned i = 0; i <= breakWord; ++i) | |
1081 | val[i] = pVal[i+offset]; // move whole word | |
1082 | ||
1083 | // Adjust the top significant word for sign bit fill, if negative | |
1084 | if (isNegative()) | |
1085 | if (bitsInWord < APINT_BITS_PER_WORD) | |
1086 | val[breakWord] |= ~0ULL << bitsInWord; // set high bits | |
1087 | } else { | |
1088 | // Shift the low order words | |
1089 | for (unsigned i = 0; i < breakWord; ++i) { | |
1090 | // This combines the shifted corresponding word with the low bits from | |
1091 | // the next word (shifted into this word's high bits). | |
1092 | val[i] = (pVal[i+offset] >> wordShift) | | |
1093 | (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); | |
1094 | } | |
1095 | ||
1096 | // Shift the break word. In this case there are no bits from the next word | |
1097 | // to include in this word. | |
1098 | val[breakWord] = pVal[breakWord+offset] >> wordShift; | |
1099 | ||
1a4d82fc | 1100 | // Deal with sign extension in the break word, and possibly the word before |
223e47cc LB |
1101 | // it. |
1102 | if (isNegative()) { | |
1103 | if (wordShift > bitsInWord) { | |
1104 | if (breakWord > 0) | |
1105 | val[breakWord-1] |= | |
1106 | ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); | |
1107 | val[breakWord] |= ~0ULL; | |
1108 | } else | |
1109 | val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); | |
1110 | } | |
1111 | } | |
1112 | ||
1113 | // Remaining words are 0 or -1, just assign them. | |
1114 | uint64_t fillValue = (isNegative() ? -1ULL : 0); | |
1115 | for (unsigned i = breakWord+1; i < getNumWords(); ++i) | |
1116 | val[i] = fillValue; | |
1117 | return APInt(val, BitWidth).clearUnusedBits(); | |
1118 | } | |
1119 | ||
1120 | /// Logical right-shift this APInt by shiftAmt. | |
1121 | /// @brief Logical right-shift function. | |
1122 | APInt APInt::lshr(const APInt &shiftAmt) const { | |
1123 | return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth)); | |
1124 | } | |
1125 | ||
1126 | /// Logical right-shift this APInt by shiftAmt. | |
1127 | /// @brief Logical right-shift function. | |
1128 | APInt APInt::lshr(unsigned shiftAmt) const { | |
1129 | if (isSingleWord()) { | |
1130 | if (shiftAmt >= BitWidth) | |
1131 | return APInt(BitWidth, 0); | |
1132 | else | |
1133 | return APInt(BitWidth, this->VAL >> shiftAmt); | |
1134 | } | |
1135 | ||
1136 | // If all the bits were shifted out, the result is 0. This avoids issues | |
1137 | // with shifting by the size of the integer type, which produces undefined | |
1138 | // results. We define these "undefined results" to always be 0. | |
1139 | if (shiftAmt >= BitWidth) | |
1140 | return APInt(BitWidth, 0); | |
1141 | ||
1142 | // If none of the bits are shifted out, the result is *this. This avoids | |
1143 | // issues with shifting by the size of the integer type, which produces | |
1144 | // undefined results in the code below. This is also an optimization. | |
1145 | if (shiftAmt == 0) | |
1146 | return *this; | |
1147 | ||
1148 | // Create some space for the result. | |
1149 | uint64_t * val = new uint64_t[getNumWords()]; | |
1150 | ||
1151 | // If we are shifting less than a word, compute the shift with a simple carry | |
1152 | if (shiftAmt < APINT_BITS_PER_WORD) { | |
1153 | lshrNear(val, pVal, getNumWords(), shiftAmt); | |
1154 | return APInt(val, BitWidth).clearUnusedBits(); | |
1155 | } | |
1156 | ||
1157 | // Compute some values needed by the remaining shift algorithms | |
1158 | unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; | |
1159 | unsigned offset = shiftAmt / APINT_BITS_PER_WORD; | |
1160 | ||
1161 | // If we are shifting whole words, just move whole words | |
1162 | if (wordShift == 0) { | |
1163 | for (unsigned i = 0; i < getNumWords() - offset; ++i) | |
1164 | val[i] = pVal[i+offset]; | |
1165 | for (unsigned i = getNumWords()-offset; i < getNumWords(); i++) | |
1166 | val[i] = 0; | |
1167 | return APInt(val,BitWidth).clearUnusedBits(); | |
1168 | } | |
1169 | ||
1170 | // Shift the low order words | |
1171 | unsigned breakWord = getNumWords() - offset -1; | |
1172 | for (unsigned i = 0; i < breakWord; ++i) | |
1173 | val[i] = (pVal[i+offset] >> wordShift) | | |
1174 | (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); | |
1175 | // Shift the break word. | |
1176 | val[breakWord] = pVal[breakWord+offset] >> wordShift; | |
1177 | ||
1178 | // Remaining words are 0 | |
1179 | for (unsigned i = breakWord+1; i < getNumWords(); ++i) | |
1180 | val[i] = 0; | |
1181 | return APInt(val, BitWidth).clearUnusedBits(); | |
1182 | } | |
1183 | ||
1184 | /// Left-shift this APInt by shiftAmt. | |
1185 | /// @brief Left-shift function. | |
1186 | APInt APInt::shl(const APInt &shiftAmt) const { | |
1187 | // It's undefined behavior in C to shift by BitWidth or greater. | |
1188 | return shl((unsigned)shiftAmt.getLimitedValue(BitWidth)); | |
1189 | } | |
1190 | ||
1191 | APInt APInt::shlSlowCase(unsigned shiftAmt) const { | |
1192 | // If all the bits were shifted out, the result is 0. This avoids issues | |
1193 | // with shifting by the size of the integer type, which produces undefined | |
1194 | // results. We define these "undefined results" to always be 0. | |
1195 | if (shiftAmt == BitWidth) | |
1196 | return APInt(BitWidth, 0); | |
1197 | ||
1198 | // If none of the bits are shifted out, the result is *this. This avoids a | |
1199 | // lshr by the words size in the loop below which can produce incorrect | |
1200 | // results. It also avoids the expensive computation below for a common case. | |
1201 | if (shiftAmt == 0) | |
1202 | return *this; | |
1203 | ||
1204 | // Create some space for the result. | |
1205 | uint64_t * val = new uint64_t[getNumWords()]; | |
1206 | ||
1207 | // If we are shifting less than a word, do it the easy way | |
1208 | if (shiftAmt < APINT_BITS_PER_WORD) { | |
1209 | uint64_t carry = 0; | |
1210 | for (unsigned i = 0; i < getNumWords(); i++) { | |
1211 | val[i] = pVal[i] << shiftAmt | carry; | |
1212 | carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt); | |
1213 | } | |
1214 | return APInt(val, BitWidth).clearUnusedBits(); | |
1215 | } | |
1216 | ||
1217 | // Compute some values needed by the remaining shift algorithms | |
1218 | unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; | |
1219 | unsigned offset = shiftAmt / APINT_BITS_PER_WORD; | |
1220 | ||
1221 | // If we are shifting whole words, just move whole words | |
1222 | if (wordShift == 0) { | |
1223 | for (unsigned i = 0; i < offset; i++) | |
1224 | val[i] = 0; | |
1225 | for (unsigned i = offset; i < getNumWords(); i++) | |
1226 | val[i] = pVal[i-offset]; | |
1227 | return APInt(val,BitWidth).clearUnusedBits(); | |
1228 | } | |
1229 | ||
1230 | // Copy whole words from this to Result. | |
1231 | unsigned i = getNumWords() - 1; | |
1232 | for (; i > offset; --i) | |
1233 | val[i] = pVal[i-offset] << wordShift | | |
1234 | pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift); | |
1235 | val[offset] = pVal[0] << wordShift; | |
1236 | for (i = 0; i < offset; ++i) | |
1237 | val[i] = 0; | |
1238 | return APInt(val, BitWidth).clearUnusedBits(); | |
1239 | } | |
1240 | ||
1241 | APInt APInt::rotl(const APInt &rotateAmt) const { | |
1242 | return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth)); | |
1243 | } | |
1244 | ||
1245 | APInt APInt::rotl(unsigned rotateAmt) const { | |
1246 | rotateAmt %= BitWidth; | |
1247 | if (rotateAmt == 0) | |
1248 | return *this; | |
1249 | return shl(rotateAmt) | lshr(BitWidth - rotateAmt); | |
1250 | } | |
1251 | ||
1252 | APInt APInt::rotr(const APInt &rotateAmt) const { | |
1253 | return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth)); | |
1254 | } | |
1255 | ||
1256 | APInt APInt::rotr(unsigned rotateAmt) const { | |
1257 | rotateAmt %= BitWidth; | |
1258 | if (rotateAmt == 0) | |
1259 | return *this; | |
1260 | return lshr(rotateAmt) | shl(BitWidth - rotateAmt); | |
1261 | } | |
1262 | ||
1263 | // Square Root - this method computes and returns the square root of "this". | |
1264 | // Three mechanisms are used for computation. For small values (<= 5 bits), | |
1265 | // a table lookup is done. This gets some performance for common cases. For | |
1266 | // values using less than 52 bits, the value is converted to double and then | |
1267 | // the libc sqrt function is called. The result is rounded and then converted | |
1268 | // back to a uint64_t which is then used to construct the result. Finally, | |
1269 | // the Babylonian method for computing square roots is used. | |
1270 | APInt APInt::sqrt() const { | |
1271 | ||
1272 | // Determine the magnitude of the value. | |
1273 | unsigned magnitude = getActiveBits(); | |
1274 | ||
1275 | // Use a fast table for some small values. This also gets rid of some | |
1276 | // rounding errors in libc sqrt for small values. | |
1277 | if (magnitude <= 5) { | |
1278 | static const uint8_t results[32] = { | |
1279 | /* 0 */ 0, | |
1280 | /* 1- 2 */ 1, 1, | |
1281 | /* 3- 6 */ 2, 2, 2, 2, | |
1282 | /* 7-12 */ 3, 3, 3, 3, 3, 3, | |
1283 | /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, | |
1284 | /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
1285 | /* 31 */ 6 | |
1286 | }; | |
1287 | return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]); | |
1288 | } | |
1289 | ||
1290 | // If the magnitude of the value fits in less than 52 bits (the precision of | |
1291 | // an IEEE double precision floating point value), then we can use the | |
1292 | // libc sqrt function which will probably use a hardware sqrt computation. | |
1293 | // This should be faster than the algorithm below. | |
1294 | if (magnitude < 52) { | |
1295 | #if HAVE_ROUND | |
1296 | return APInt(BitWidth, | |
1297 | uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); | |
1298 | #else | |
1299 | return APInt(BitWidth, | |
1300 | uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5)); | |
1301 | #endif | |
1302 | } | |
1303 | ||
1304 | // Okay, all the short cuts are exhausted. We must compute it. The following | |
1305 | // is a classical Babylonian method for computing the square root. This code | |
1a4d82fc | 1306 | // was adapted to APInt from a wikipedia article on such computations. |
223e47cc LB |
1307 | // See http://www.wikipedia.org/ and go to the page named |
1308 | // Calculate_an_integer_square_root. | |
1309 | unsigned nbits = BitWidth, i = 4; | |
1310 | APInt testy(BitWidth, 16); | |
1311 | APInt x_old(BitWidth, 1); | |
1312 | APInt x_new(BitWidth, 0); | |
1313 | APInt two(BitWidth, 2); | |
1314 | ||
1315 | // Select a good starting value using binary logarithms. | |
1316 | for (;; i += 2, testy = testy.shl(2)) | |
1317 | if (i >= nbits || this->ule(testy)) { | |
1318 | x_old = x_old.shl(i / 2); | |
1319 | break; | |
1320 | } | |
1321 | ||
1322 | // Use the Babylonian method to arrive at the integer square root: | |
1323 | for (;;) { | |
1324 | x_new = (this->udiv(x_old) + x_old).udiv(two); | |
1325 | if (x_old.ule(x_new)) | |
1326 | break; | |
1327 | x_old = x_new; | |
1328 | } | |
1329 | ||
1330 | // Make sure we return the closest approximation | |
1331 | // NOTE: The rounding calculation below is correct. It will produce an | |
1332 | // off-by-one discrepancy with results from pari/gp. That discrepancy has been | |
1333 | // determined to be a rounding issue with pari/gp as it begins to use a | |
1334 | // floating point representation after 192 bits. There are no discrepancies | |
1335 | // between this algorithm and pari/gp for bit widths < 192 bits. | |
1336 | APInt square(x_old * x_old); | |
1337 | APInt nextSquare((x_old + 1) * (x_old +1)); | |
1338 | if (this->ult(square)) | |
1339 | return x_old; | |
1340 | assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); | |
1341 | APInt midpoint((nextSquare - square).udiv(two)); | |
1342 | APInt offset(*this - square); | |
1343 | if (offset.ult(midpoint)) | |
1344 | return x_old; | |
1345 | return x_old + 1; | |
1346 | } | |
1347 | ||
1348 | /// Computes the multiplicative inverse of this APInt for a given modulo. The | |
1349 | /// iterative extended Euclidean algorithm is used to solve for this value, | |
1350 | /// however we simplify it to speed up calculating only the inverse, and take | |
1351 | /// advantage of div+rem calculations. We also use some tricks to avoid copying | |
1352 | /// (potentially large) APInts around. | |
1353 | APInt APInt::multiplicativeInverse(const APInt& modulo) const { | |
1354 | assert(ult(modulo) && "This APInt must be smaller than the modulo"); | |
1355 | ||
1356 | // Using the properties listed at the following web page (accessed 06/21/08): | |
1357 | // http://www.numbertheory.org/php/euclid.html | |
1358 | // (especially the properties numbered 3, 4 and 9) it can be proved that | |
1359 | // BitWidth bits suffice for all the computations in the algorithm implemented | |
1360 | // below. More precisely, this number of bits suffice if the multiplicative | |
1361 | // inverse exists, but may not suffice for the general extended Euclidean | |
1362 | // algorithm. | |
1363 | ||
1364 | APInt r[2] = { modulo, *this }; | |
1365 | APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; | |
1366 | APInt q(BitWidth, 0); | |
1367 | ||
1368 | unsigned i; | |
1369 | for (i = 0; r[i^1] != 0; i ^= 1) { | |
1370 | // An overview of the math without the confusing bit-flipping: | |
1371 | // q = r[i-2] / r[i-1] | |
1372 | // r[i] = r[i-2] % r[i-1] | |
1373 | // t[i] = t[i-2] - t[i-1] * q | |
1374 | udivrem(r[i], r[i^1], q, r[i]); | |
1375 | t[i] -= t[i^1] * q; | |
1376 | } | |
1377 | ||
1378 | // If this APInt and the modulo are not coprime, there is no multiplicative | |
1379 | // inverse, so return 0. We check this by looking at the next-to-last | |
1380 | // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean | |
1381 | // algorithm. | |
1382 | if (r[i] != 1) | |
1383 | return APInt(BitWidth, 0); | |
1384 | ||
1385 | // The next-to-last t is the multiplicative inverse. However, we are | |
1386 | // interested in a positive inverse. Calcuate a positive one from a negative | |
1387 | // one if necessary. A simple addition of the modulo suffices because | |
1388 | // abs(t[i]) is known to be less than *this/2 (see the link above). | |
1389 | return t[i].isNegative() ? t[i] + modulo : t[i]; | |
1390 | } | |
1391 | ||
1392 | /// Calculate the magic numbers required to implement a signed integer division | |
1393 | /// by a constant as a sequence of multiplies, adds and shifts. Requires that | |
1394 | /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. | |
1395 | /// Warren, Jr., chapter 10. | |
1396 | APInt::ms APInt::magic() const { | |
1397 | const APInt& d = *this; | |
1398 | unsigned p; | |
1399 | APInt ad, anc, delta, q1, r1, q2, r2, t; | |
1400 | APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); | |
1401 | struct ms mag; | |
1402 | ||
1403 | ad = d.abs(); | |
1404 | t = signedMin + (d.lshr(d.getBitWidth() - 1)); | |
1405 | anc = t - 1 - t.urem(ad); // absolute value of nc | |
1406 | p = d.getBitWidth() - 1; // initialize p | |
1407 | q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) | |
1408 | r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) | |
1409 | q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) | |
1410 | r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) | |
1411 | do { | |
1412 | p = p + 1; | |
1413 | q1 = q1<<1; // update q1 = 2p/abs(nc) | |
1414 | r1 = r1<<1; // update r1 = rem(2p/abs(nc)) | |
1415 | if (r1.uge(anc)) { // must be unsigned comparison | |
1416 | q1 = q1 + 1; | |
1417 | r1 = r1 - anc; | |
1418 | } | |
1419 | q2 = q2<<1; // update q2 = 2p/abs(d) | |
1420 | r2 = r2<<1; // update r2 = rem(2p/abs(d)) | |
1421 | if (r2.uge(ad)) { // must be unsigned comparison | |
1422 | q2 = q2 + 1; | |
1423 | r2 = r2 - ad; | |
1424 | } | |
1425 | delta = ad - r2; | |
1426 | } while (q1.ult(delta) || (q1 == delta && r1 == 0)); | |
1427 | ||
1428 | mag.m = q2 + 1; | |
1429 | if (d.isNegative()) mag.m = -mag.m; // resulting magic number | |
1430 | mag.s = p - d.getBitWidth(); // resulting shift | |
1431 | return mag; | |
1432 | } | |
1433 | ||
1434 | /// Calculate the magic numbers required to implement an unsigned integer | |
1435 | /// division by a constant as a sequence of multiplies, adds and shifts. | |
1436 | /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry | |
1437 | /// S. Warren, Jr., chapter 10. | |
1438 | /// LeadingZeros can be used to simplify the calculation if the upper bits | |
1439 | /// of the divided value are known zero. | |
1440 | APInt::mu APInt::magicu(unsigned LeadingZeros) const { | |
1441 | const APInt& d = *this; | |
1442 | unsigned p; | |
1443 | APInt nc, delta, q1, r1, q2, r2; | |
1444 | struct mu magu; | |
1445 | magu.a = 0; // initialize "add" indicator | |
1446 | APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); | |
1447 | APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); | |
1448 | APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); | |
1449 | ||
1450 | nc = allOnes - (allOnes - d).urem(d); | |
1451 | p = d.getBitWidth() - 1; // initialize p | |
1452 | q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc | |
1453 | r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) | |
1454 | q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d | |
1455 | r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) | |
1456 | do { | |
1457 | p = p + 1; | |
1458 | if (r1.uge(nc - r1)) { | |
1459 | q1 = q1 + q1 + 1; // update q1 | |
1460 | r1 = r1 + r1 - nc; // update r1 | |
1461 | } | |
1462 | else { | |
1463 | q1 = q1+q1; // update q1 | |
1464 | r1 = r1+r1; // update r1 | |
1465 | } | |
1466 | if ((r2 + 1).uge(d - r2)) { | |
1467 | if (q2.uge(signedMax)) magu.a = 1; | |
1468 | q2 = q2+q2 + 1; // update q2 | |
1469 | r2 = r2+r2 + 1 - d; // update r2 | |
1470 | } | |
1471 | else { | |
1472 | if (q2.uge(signedMin)) magu.a = 1; | |
1473 | q2 = q2+q2; // update q2 | |
1474 | r2 = r2+r2 + 1; // update r2 | |
1475 | } | |
1476 | delta = d - 1 - r2; | |
1477 | } while (p < d.getBitWidth()*2 && | |
1478 | (q1.ult(delta) || (q1 == delta && r1 == 0))); | |
1479 | magu.m = q2 + 1; // resulting magic number | |
1480 | magu.s = p - d.getBitWidth(); // resulting shift | |
1481 | return magu; | |
1482 | } | |
1483 | ||
1484 | /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) | |
1485 | /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The | |
1486 | /// variables here have the same names as in the algorithm. Comments explain | |
1487 | /// the algorithm and any deviation from it. | |
1488 | static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, | |
1489 | unsigned m, unsigned n) { | |
1490 | assert(u && "Must provide dividend"); | |
1491 | assert(v && "Must provide divisor"); | |
1492 | assert(q && "Must provide quotient"); | |
1493 | assert(u != v && u != q && v != q && "Must us different memory"); | |
1494 | assert(n>1 && "n must be > 1"); | |
1495 | ||
1496 | // Knuth uses the value b as the base of the number system. In our case b | |
1497 | // is 2^31 so we just set it to -1u. | |
1498 | uint64_t b = uint64_t(1) << 32; | |
1499 | ||
1500 | #if 0 | |
1501 | DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); | |
1502 | DEBUG(dbgs() << "KnuthDiv: original:"); | |
1503 | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); | |
1504 | DEBUG(dbgs() << " by"); | |
1505 | DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); | |
1506 | DEBUG(dbgs() << '\n'); | |
1507 | #endif | |
1508 | // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of | |
1509 | // u and v by d. Note that we have taken Knuth's advice here to use a power | |
1510 | // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of | |
1511 | // 2 allows us to shift instead of multiply and it is easy to determine the | |
1512 | // shift amount from the leading zeros. We are basically normalizing the u | |
1513 | // and v so that its high bits are shifted to the top of v's range without | |
1514 | // overflow. Note that this can require an extra word in u so that u must | |
1515 | // be of length m+n+1. | |
1a4d82fc | 1516 | unsigned shift = countLeadingZeros(v[n-1]); |
223e47cc LB |
1517 | unsigned v_carry = 0; |
1518 | unsigned u_carry = 0; | |
1519 | if (shift) { | |
1520 | for (unsigned i = 0; i < m+n; ++i) { | |
1521 | unsigned u_tmp = u[i] >> (32 - shift); | |
1522 | u[i] = (u[i] << shift) | u_carry; | |
1523 | u_carry = u_tmp; | |
1524 | } | |
1525 | for (unsigned i = 0; i < n; ++i) { | |
1526 | unsigned v_tmp = v[i] >> (32 - shift); | |
1527 | v[i] = (v[i] << shift) | v_carry; | |
1528 | v_carry = v_tmp; | |
1529 | } | |
1530 | } | |
1531 | u[m+n] = u_carry; | |
1532 | #if 0 | |
1533 | DEBUG(dbgs() << "KnuthDiv: normal:"); | |
1534 | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); | |
1535 | DEBUG(dbgs() << " by"); | |
1536 | DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); | |
1537 | DEBUG(dbgs() << '\n'); | |
1538 | #endif | |
1539 | ||
1540 | // D2. [Initialize j.] Set j to m. This is the loop counter over the places. | |
1541 | int j = m; | |
1542 | do { | |
1543 | DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); | |
1544 | // D3. [Calculate q'.]. | |
1545 | // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') | |
1546 | // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') | |
1547 | // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease | |
1548 | // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test | |
1549 | // on v[n-2] determines at high speed most of the cases in which the trial | |
1550 | // value qp is one too large, and it eliminates all cases where qp is two | |
1551 | // too large. | |
1552 | uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); | |
1553 | DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); | |
1554 | uint64_t qp = dividend / v[n-1]; | |
1555 | uint64_t rp = dividend % v[n-1]; | |
1556 | if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { | |
1557 | qp--; | |
1558 | rp += v[n-1]; | |
1559 | if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) | |
1560 | qp--; | |
1561 | } | |
1562 | DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); | |
1563 | ||
1564 | // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with | |
1565 | // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation | |
1566 | // consists of a simple multiplication by a one-place number, combined with | |
1567 | // a subtraction. | |
1568 | bool isNeg = false; | |
1569 | for (unsigned i = 0; i < n; ++i) { | |
1570 | uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); | |
1571 | uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); | |
1572 | bool borrow = subtrahend > u_tmp; | |
1573 | DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp | |
1574 | << ", subtrahend == " << subtrahend | |
1575 | << ", borrow = " << borrow << '\n'); | |
1576 | ||
1577 | uint64_t result = u_tmp - subtrahend; | |
1578 | unsigned k = j + i; | |
1579 | u[k++] = (unsigned)(result & (b-1)); // subtract low word | |
1580 | u[k++] = (unsigned)(result >> 32); // subtract high word | |
1581 | while (borrow && k <= m+n) { // deal with borrow to the left | |
1582 | borrow = u[k] == 0; | |
1583 | u[k]--; | |
1584 | k++; | |
1585 | } | |
1586 | isNeg |= borrow; | |
1587 | DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << | |
1588 | u[j+i+1] << '\n'); | |
1589 | } | |
1590 | DEBUG(dbgs() << "KnuthDiv: after subtraction:"); | |
1591 | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); | |
1592 | DEBUG(dbgs() << '\n'); | |
1593 | // The digits (u[j+n]...u[j]) should be kept positive; if the result of | |
1594 | // this step is actually negative, (u[j+n]...u[j]) should be left as the | |
1595 | // true value plus b**(n+1), namely as the b's complement of | |
1596 | // the true value, and a "borrow" to the left should be remembered. | |
1597 | // | |
1598 | if (isNeg) { | |
1599 | bool carry = true; // true because b's complement is "complement + 1" | |
1600 | for (unsigned i = 0; i <= m+n; ++i) { | |
1601 | u[i] = ~u[i] + carry; // b's complement | |
1602 | carry = carry && u[i] == 0; | |
1603 | } | |
1604 | } | |
1605 | DEBUG(dbgs() << "KnuthDiv: after complement:"); | |
1606 | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); | |
1607 | DEBUG(dbgs() << '\n'); | |
1608 | ||
1609 | // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was | |
1610 | // negative, go to step D6; otherwise go on to step D7. | |
1611 | q[j] = (unsigned)qp; | |
1612 | if (isNeg) { | |
1613 | // D6. [Add back]. The probability that this step is necessary is very | |
1614 | // small, on the order of only 2/b. Make sure that test data accounts for | |
1615 | // this possibility. Decrease q[j] by 1 | |
1616 | q[j]--; | |
1617 | // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). | |
1618 | // A carry will occur to the left of u[j+n], and it should be ignored | |
1619 | // since it cancels with the borrow that occurred in D4. | |
1620 | bool carry = false; | |
1621 | for (unsigned i = 0; i < n; i++) { | |
1622 | unsigned limit = std::min(u[j+i],v[i]); | |
1623 | u[j+i] += v[i] + carry; | |
1624 | carry = u[j+i] < limit || (carry && u[j+i] == limit); | |
1625 | } | |
1626 | u[j+n] += carry; | |
1627 | } | |
1628 | DEBUG(dbgs() << "KnuthDiv: after correction:"); | |
1629 | DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]); | |
1630 | DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); | |
1631 | ||
1632 | // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. | |
1633 | } while (--j >= 0); | |
1634 | ||
1635 | DEBUG(dbgs() << "KnuthDiv: quotient:"); | |
1636 | DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]); | |
1637 | DEBUG(dbgs() << '\n'); | |
1638 | ||
1639 | // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired | |
1640 | // remainder may be obtained by dividing u[...] by d. If r is non-null we | |
1641 | // compute the remainder (urem uses this). | |
1642 | if (r) { | |
1643 | // The value d is expressed by the "shift" value above since we avoided | |
1644 | // multiplication by d by using a shift left. So, all we have to do is | |
1645 | // shift right here. In order to mak | |
1646 | if (shift) { | |
1647 | unsigned carry = 0; | |
1648 | DEBUG(dbgs() << "KnuthDiv: remainder:"); | |
1649 | for (int i = n-1; i >= 0; i--) { | |
1650 | r[i] = (u[i] >> shift) | carry; | |
1651 | carry = u[i] << (32 - shift); | |
1652 | DEBUG(dbgs() << " " << r[i]); | |
1653 | } | |
1654 | } else { | |
1655 | for (int i = n-1; i >= 0; i--) { | |
1656 | r[i] = u[i]; | |
1657 | DEBUG(dbgs() << " " << r[i]); | |
1658 | } | |
1659 | } | |
1660 | DEBUG(dbgs() << '\n'); | |
1661 | } | |
1662 | #if 0 | |
1663 | DEBUG(dbgs() << '\n'); | |
1664 | #endif | |
1665 | } | |
1666 | ||
1667 | void APInt::divide(const APInt LHS, unsigned lhsWords, | |
1668 | const APInt &RHS, unsigned rhsWords, | |
1669 | APInt *Quotient, APInt *Remainder) | |
1670 | { | |
1671 | assert(lhsWords >= rhsWords && "Fractional result"); | |
1672 | ||
1673 | // First, compose the values into an array of 32-bit words instead of | |
1674 | // 64-bit words. This is a necessity of both the "short division" algorithm | |
1675 | // and the Knuth "classical algorithm" which requires there to be native | |
1676 | // operations for +, -, and * on an m bit value with an m*2 bit result. We | |
1677 | // can't use 64-bit operands here because we don't have native results of | |
1678 | // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't | |
1679 | // work on large-endian machines. | |
1680 | uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT); | |
1681 | unsigned n = rhsWords * 2; | |
1682 | unsigned m = (lhsWords * 2) - n; | |
1683 | ||
1684 | // Allocate space for the temporary values we need either on the stack, if | |
1685 | // it will fit, or on the heap if it won't. | |
1686 | unsigned SPACE[128]; | |
1a4d82fc JJ |
1687 | unsigned *U = nullptr; |
1688 | unsigned *V = nullptr; | |
1689 | unsigned *Q = nullptr; | |
1690 | unsigned *R = nullptr; | |
223e47cc LB |
1691 | if ((Remainder?4:3)*n+2*m+1 <= 128) { |
1692 | U = &SPACE[0]; | |
1693 | V = &SPACE[m+n+1]; | |
1694 | Q = &SPACE[(m+n+1) + n]; | |
1695 | if (Remainder) | |
1696 | R = &SPACE[(m+n+1) + n + (m+n)]; | |
1697 | } else { | |
1698 | U = new unsigned[m + n + 1]; | |
1699 | V = new unsigned[n]; | |
1700 | Q = new unsigned[m+n]; | |
1701 | if (Remainder) | |
1702 | R = new unsigned[n]; | |
1703 | } | |
1704 | ||
1705 | // Initialize the dividend | |
1706 | memset(U, 0, (m+n+1)*sizeof(unsigned)); | |
1707 | for (unsigned i = 0; i < lhsWords; ++i) { | |
1708 | uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]); | |
1709 | U[i * 2] = (unsigned)(tmp & mask); | |
1710 | U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); | |
1711 | } | |
1712 | U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. | |
1713 | ||
1714 | // Initialize the divisor | |
1715 | memset(V, 0, (n)*sizeof(unsigned)); | |
1716 | for (unsigned i = 0; i < rhsWords; ++i) { | |
1717 | uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]); | |
1718 | V[i * 2] = (unsigned)(tmp & mask); | |
1719 | V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); | |
1720 | } | |
1721 | ||
1722 | // initialize the quotient and remainder | |
1723 | memset(Q, 0, (m+n) * sizeof(unsigned)); | |
1724 | if (Remainder) | |
1725 | memset(R, 0, n * sizeof(unsigned)); | |
1726 | ||
1727 | // Now, adjust m and n for the Knuth division. n is the number of words in | |
1728 | // the divisor. m is the number of words by which the dividend exceeds the | |
1729 | // divisor (i.e. m+n is the length of the dividend). These sizes must not | |
1730 | // contain any zero words or the Knuth algorithm fails. | |
1731 | for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { | |
1732 | n--; | |
1733 | m++; | |
1734 | } | |
1735 | for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) | |
1736 | m--; | |
1737 | ||
1738 | // If we're left with only a single word for the divisor, Knuth doesn't work | |
1739 | // so we implement the short division algorithm here. This is much simpler | |
1740 | // and faster because we are certain that we can divide a 64-bit quantity | |
1741 | // by a 32-bit quantity at hardware speed and short division is simply a | |
1742 | // series of such operations. This is just like doing short division but we | |
1743 | // are using base 2^32 instead of base 10. | |
1744 | assert(n != 0 && "Divide by zero?"); | |
1745 | if (n == 1) { | |
1746 | unsigned divisor = V[0]; | |
1747 | unsigned remainder = 0; | |
1748 | for (int i = m+n-1; i >= 0; i--) { | |
1749 | uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i]; | |
1750 | if (partial_dividend == 0) { | |
1751 | Q[i] = 0; | |
1752 | remainder = 0; | |
1753 | } else if (partial_dividend < divisor) { | |
1754 | Q[i] = 0; | |
1755 | remainder = (unsigned)partial_dividend; | |
1756 | } else if (partial_dividend == divisor) { | |
1757 | Q[i] = 1; | |
1758 | remainder = 0; | |
1759 | } else { | |
1760 | Q[i] = (unsigned)(partial_dividend / divisor); | |
1761 | remainder = (unsigned)(partial_dividend - (Q[i] * divisor)); | |
1762 | } | |
1763 | } | |
1764 | if (R) | |
1765 | R[0] = remainder; | |
1766 | } else { | |
1767 | // Now we're ready to invoke the Knuth classical divide algorithm. In this | |
1768 | // case n > 1. | |
1769 | KnuthDiv(U, V, Q, R, m, n); | |
1770 | } | |
1771 | ||
1772 | // If the caller wants the quotient | |
1773 | if (Quotient) { | |
1774 | // Set up the Quotient value's memory. | |
1775 | if (Quotient->BitWidth != LHS.BitWidth) { | |
1776 | if (Quotient->isSingleWord()) | |
1777 | Quotient->VAL = 0; | |
1778 | else | |
1779 | delete [] Quotient->pVal; | |
1780 | Quotient->BitWidth = LHS.BitWidth; | |
1781 | if (!Quotient->isSingleWord()) | |
1782 | Quotient->pVal = getClearedMemory(Quotient->getNumWords()); | |
1783 | } else | |
1784 | Quotient->clearAllBits(); | |
1785 | ||
1786 | // The quotient is in Q. Reconstitute the quotient into Quotient's low | |
1787 | // order words. | |
1788 | if (lhsWords == 1) { | |
1789 | uint64_t tmp = | |
1790 | uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); | |
1791 | if (Quotient->isSingleWord()) | |
1792 | Quotient->VAL = tmp; | |
1793 | else | |
1794 | Quotient->pVal[0] = tmp; | |
1795 | } else { | |
1796 | assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); | |
1797 | for (unsigned i = 0; i < lhsWords; ++i) | |
1798 | Quotient->pVal[i] = | |
1799 | uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); | |
1800 | } | |
1801 | } | |
1802 | ||
1803 | // If the caller wants the remainder | |
1804 | if (Remainder) { | |
1805 | // Set up the Remainder value's memory. | |
1806 | if (Remainder->BitWidth != RHS.BitWidth) { | |
1807 | if (Remainder->isSingleWord()) | |
1808 | Remainder->VAL = 0; | |
1809 | else | |
1810 | delete [] Remainder->pVal; | |
1811 | Remainder->BitWidth = RHS.BitWidth; | |
1812 | if (!Remainder->isSingleWord()) | |
1813 | Remainder->pVal = getClearedMemory(Remainder->getNumWords()); | |
1814 | } else | |
1815 | Remainder->clearAllBits(); | |
1816 | ||
1817 | // The remainder is in R. Reconstitute the remainder into Remainder's low | |
1818 | // order words. | |
1819 | if (rhsWords == 1) { | |
1820 | uint64_t tmp = | |
1821 | uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); | |
1822 | if (Remainder->isSingleWord()) | |
1823 | Remainder->VAL = tmp; | |
1824 | else | |
1825 | Remainder->pVal[0] = tmp; | |
1826 | } else { | |
1827 | assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); | |
1828 | for (unsigned i = 0; i < rhsWords; ++i) | |
1829 | Remainder->pVal[i] = | |
1830 | uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); | |
1831 | } | |
1832 | } | |
1833 | ||
1834 | // Clean up the memory we allocated. | |
1835 | if (U != &SPACE[0]) { | |
1836 | delete [] U; | |
1837 | delete [] V; | |
1838 | delete [] Q; | |
1839 | delete [] R; | |
1840 | } | |
1841 | } | |
1842 | ||
1843 | APInt APInt::udiv(const APInt& RHS) const { | |
1844 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
1845 | ||
1846 | // First, deal with the easy case | |
1847 | if (isSingleWord()) { | |
1848 | assert(RHS.VAL != 0 && "Divide by zero?"); | |
1849 | return APInt(BitWidth, VAL / RHS.VAL); | |
1850 | } | |
1851 | ||
1852 | // Get some facts about the LHS and RHS number of bits and words | |
1853 | unsigned rhsBits = RHS.getActiveBits(); | |
1854 | unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); | |
1855 | assert(rhsWords && "Divided by zero???"); | |
1856 | unsigned lhsBits = this->getActiveBits(); | |
1857 | unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); | |
1858 | ||
1859 | // Deal with some degenerate cases | |
1860 | if (!lhsWords) | |
1861 | // 0 / X ===> 0 | |
1862 | return APInt(BitWidth, 0); | |
1863 | else if (lhsWords < rhsWords || this->ult(RHS)) { | |
1864 | // X / Y ===> 0, iff X < Y | |
1865 | return APInt(BitWidth, 0); | |
1866 | } else if (*this == RHS) { | |
1867 | // X / X ===> 1 | |
1868 | return APInt(BitWidth, 1); | |
1869 | } else if (lhsWords == 1 && rhsWords == 1) { | |
1870 | // All high words are zero, just use native divide | |
1871 | return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]); | |
1872 | } | |
1873 | ||
1874 | // We have to compute it the hard way. Invoke the Knuth divide algorithm. | |
1875 | APInt Quotient(1,0); // to hold result. | |
1a4d82fc | 1876 | divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr); |
223e47cc LB |
1877 | return Quotient; |
1878 | } | |
1879 | ||
970d7e83 LB |
1880 | APInt APInt::sdiv(const APInt &RHS) const { |
1881 | if (isNegative()) { | |
1882 | if (RHS.isNegative()) | |
1883 | return (-(*this)).udiv(-RHS); | |
1884 | return -((-(*this)).udiv(RHS)); | |
1885 | } | |
1886 | if (RHS.isNegative()) | |
1887 | return -(this->udiv(-RHS)); | |
1888 | return this->udiv(RHS); | |
1889 | } | |
1890 | ||
223e47cc LB |
1891 | APInt APInt::urem(const APInt& RHS) const { |
1892 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); | |
1893 | if (isSingleWord()) { | |
1894 | assert(RHS.VAL != 0 && "Remainder by zero?"); | |
1895 | return APInt(BitWidth, VAL % RHS.VAL); | |
1896 | } | |
1897 | ||
1898 | // Get some facts about the LHS | |
1899 | unsigned lhsBits = getActiveBits(); | |
1900 | unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1); | |
1901 | ||
1902 | // Get some facts about the RHS | |
1903 | unsigned rhsBits = RHS.getActiveBits(); | |
1904 | unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); | |
1905 | assert(rhsWords && "Performing remainder operation by zero ???"); | |
1906 | ||
1907 | // Check the degenerate cases | |
1908 | if (lhsWords == 0) { | |
1909 | // 0 % Y ===> 0 | |
1910 | return APInt(BitWidth, 0); | |
1911 | } else if (lhsWords < rhsWords || this->ult(RHS)) { | |
1912 | // X % Y ===> X, iff X < Y | |
1913 | return *this; | |
1914 | } else if (*this == RHS) { | |
1915 | // X % X == 0; | |
1916 | return APInt(BitWidth, 0); | |
1917 | } else if (lhsWords == 1) { | |
1918 | // All high words are zero, just use native remainder | |
1919 | return APInt(BitWidth, pVal[0] % RHS.pVal[0]); | |
1920 | } | |
1921 | ||
1922 | // We have to compute it the hard way. Invoke the Knuth divide algorithm. | |
1923 | APInt Remainder(1,0); | |
1a4d82fc | 1924 | divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder); |
223e47cc LB |
1925 | return Remainder; |
1926 | } | |
1927 | ||
970d7e83 LB |
1928 | APInt APInt::srem(const APInt &RHS) const { |
1929 | if (isNegative()) { | |
1930 | if (RHS.isNegative()) | |
1931 | return -((-(*this)).urem(-RHS)); | |
1932 | return -((-(*this)).urem(RHS)); | |
1933 | } | |
1934 | if (RHS.isNegative()) | |
1935 | return this->urem(-RHS); | |
1936 | return this->urem(RHS); | |
1937 | } | |
1938 | ||
223e47cc LB |
1939 | void APInt::udivrem(const APInt &LHS, const APInt &RHS, |
1940 | APInt &Quotient, APInt &Remainder) { | |
1941 | // Get some size facts about the dividend and divisor | |
1942 | unsigned lhsBits = LHS.getActiveBits(); | |
1943 | unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); | |
1944 | unsigned rhsBits = RHS.getActiveBits(); | |
1945 | unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); | |
1946 | ||
1947 | // Check the degenerate cases | |
1948 | if (lhsWords == 0) { | |
1949 | Quotient = 0; // 0 / Y ===> 0 | |
1950 | Remainder = 0; // 0 % Y ===> 0 | |
1951 | return; | |
1952 | } | |
1953 | ||
1954 | if (lhsWords < rhsWords || LHS.ult(RHS)) { | |
1955 | Remainder = LHS; // X % Y ===> X, iff X < Y | |
1956 | Quotient = 0; // X / Y ===> 0, iff X < Y | |
1957 | return; | |
1958 | } | |
1959 | ||
1960 | if (LHS == RHS) { | |
1961 | Quotient = 1; // X / X ===> 1 | |
1962 | Remainder = 0; // X % X ===> 0; | |
1963 | return; | |
1964 | } | |
1965 | ||
1966 | if (lhsWords == 1 && rhsWords == 1) { | |
1967 | // There is only one word to consider so use the native versions. | |
1968 | uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; | |
1969 | uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0]; | |
1970 | Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue); | |
1971 | Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue); | |
1972 | return; | |
1973 | } | |
1974 | ||
1975 | // Okay, lets do it the long way | |
1976 | divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); | |
1977 | } | |
1978 | ||
970d7e83 LB |
1979 | void APInt::sdivrem(const APInt &LHS, const APInt &RHS, |
1980 | APInt &Quotient, APInt &Remainder) { | |
1981 | if (LHS.isNegative()) { | |
1982 | if (RHS.isNegative()) | |
1983 | APInt::udivrem(-LHS, -RHS, Quotient, Remainder); | |
1984 | else { | |
1985 | APInt::udivrem(-LHS, RHS, Quotient, Remainder); | |
1986 | Quotient = -Quotient; | |
1987 | } | |
1988 | Remainder = -Remainder; | |
1989 | } else if (RHS.isNegative()) { | |
1990 | APInt::udivrem(LHS, -RHS, Quotient, Remainder); | |
1991 | Quotient = -Quotient; | |
1992 | } else { | |
1993 | APInt::udivrem(LHS, RHS, Quotient, Remainder); | |
1994 | } | |
1995 | } | |
1996 | ||
223e47cc LB |
1997 | APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { |
1998 | APInt Res = *this+RHS; | |
1999 | Overflow = isNonNegative() == RHS.isNonNegative() && | |
2000 | Res.isNonNegative() != isNonNegative(); | |
2001 | return Res; | |
2002 | } | |
2003 | ||
2004 | APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { | |
2005 | APInt Res = *this+RHS; | |
2006 | Overflow = Res.ult(RHS); | |
2007 | return Res; | |
2008 | } | |
2009 | ||
2010 | APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { | |
2011 | APInt Res = *this - RHS; | |
2012 | Overflow = isNonNegative() != RHS.isNonNegative() && | |
2013 | Res.isNonNegative() != isNonNegative(); | |
2014 | return Res; | |
2015 | } | |
2016 | ||
2017 | APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { | |
2018 | APInt Res = *this-RHS; | |
2019 | Overflow = Res.ugt(*this); | |
2020 | return Res; | |
2021 | } | |
2022 | ||
2023 | APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { | |
2024 | // MININT/-1 --> overflow. | |
2025 | Overflow = isMinSignedValue() && RHS.isAllOnesValue(); | |
2026 | return sdiv(RHS); | |
2027 | } | |
2028 | ||
2029 | APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { | |
2030 | APInt Res = *this * RHS; | |
2031 | ||
2032 | if (*this != 0 && RHS != 0) | |
2033 | Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; | |
2034 | else | |
2035 | Overflow = false; | |
2036 | return Res; | |
2037 | } | |
2038 | ||
2039 | APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { | |
2040 | APInt Res = *this * RHS; | |
2041 | ||
2042 | if (*this != 0 && RHS != 0) | |
2043 | Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; | |
2044 | else | |
2045 | Overflow = false; | |
2046 | return Res; | |
2047 | } | |
2048 | ||
2049 | APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const { | |
2050 | Overflow = ShAmt >= getBitWidth(); | |
2051 | if (Overflow) | |
2052 | ShAmt = getBitWidth()-1; | |
2053 | ||
2054 | if (isNonNegative()) // Don't allow sign change. | |
2055 | Overflow = ShAmt >= countLeadingZeros(); | |
2056 | else | |
2057 | Overflow = ShAmt >= countLeadingOnes(); | |
2058 | ||
2059 | return *this << ShAmt; | |
2060 | } | |
2061 | ||
2062 | ||
2063 | ||
2064 | ||
2065 | void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { | |
2066 | // Check our assumptions here | |
2067 | assert(!str.empty() && "Invalid string length"); | |
2068 | assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || | |
2069 | radix == 36) && | |
2070 | "Radix should be 2, 8, 10, 16, or 36!"); | |
2071 | ||
2072 | StringRef::iterator p = str.begin(); | |
2073 | size_t slen = str.size(); | |
2074 | bool isNeg = *p == '-'; | |
2075 | if (*p == '-' || *p == '+') { | |
2076 | p++; | |
2077 | slen--; | |
2078 | assert(slen && "String is only a sign, needs a value."); | |
2079 | } | |
2080 | assert((slen <= numbits || radix != 2) && "Insufficient bit width"); | |
2081 | assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); | |
2082 | assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); | |
2083 | assert((((slen-1)*64)/22 <= numbits || radix != 10) && | |
2084 | "Insufficient bit width"); | |
2085 | ||
2086 | // Allocate memory | |
2087 | if (!isSingleWord()) | |
2088 | pVal = getClearedMemory(getNumWords()); | |
2089 | ||
2090 | // Figure out if we can shift instead of multiply | |
2091 | unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); | |
2092 | ||
2093 | // Set up an APInt for the digit to add outside the loop so we don't | |
2094 | // constantly construct/destruct it. | |
2095 | APInt apdigit(getBitWidth(), 0); | |
2096 | APInt apradix(getBitWidth(), radix); | |
2097 | ||
2098 | // Enter digit traversal loop | |
2099 | for (StringRef::iterator e = str.end(); p != e; ++p) { | |
2100 | unsigned digit = getDigit(*p, radix); | |
2101 | assert(digit < radix && "Invalid character in digit string"); | |
2102 | ||
2103 | // Shift or multiply the value by the radix | |
2104 | if (slen > 1) { | |
2105 | if (shift) | |
2106 | *this <<= shift; | |
2107 | else | |
2108 | *this *= apradix; | |
2109 | } | |
2110 | ||
2111 | // Add in the digit we just interpreted | |
2112 | if (apdigit.isSingleWord()) | |
2113 | apdigit.VAL = digit; | |
2114 | else | |
2115 | apdigit.pVal[0] = digit; | |
2116 | *this += apdigit; | |
2117 | } | |
2118 | // If its negative, put it in two's complement form | |
2119 | if (isNeg) { | |
1a4d82fc | 2120 | --(*this); |
223e47cc LB |
2121 | this->flipAllBits(); |
2122 | } | |
2123 | } | |
2124 | ||
2125 | void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, | |
2126 | bool Signed, bool formatAsCLiteral) const { | |
2127 | assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || | |
2128 | Radix == 36) && | |
2129 | "Radix should be 2, 8, 10, 16, or 36!"); | |
2130 | ||
2131 | const char *Prefix = ""; | |
2132 | if (formatAsCLiteral) { | |
2133 | switch (Radix) { | |
2134 | case 2: | |
2135 | // Binary literals are a non-standard extension added in gcc 4.3: | |
2136 | // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html | |
2137 | Prefix = "0b"; | |
2138 | break; | |
2139 | case 8: | |
2140 | Prefix = "0"; | |
2141 | break; | |
2142 | case 10: | |
2143 | break; // No prefix | |
2144 | case 16: | |
2145 | Prefix = "0x"; | |
2146 | break; | |
2147 | default: | |
2148 | llvm_unreachable("Invalid radix!"); | |
2149 | } | |
2150 | } | |
2151 | ||
2152 | // First, check for a zero value and just short circuit the logic below. | |
2153 | if (*this == 0) { | |
2154 | while (*Prefix) { | |
2155 | Str.push_back(*Prefix); | |
2156 | ++Prefix; | |
2157 | }; | |
2158 | Str.push_back('0'); | |
2159 | return; | |
2160 | } | |
2161 | ||
2162 | static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |
2163 | ||
2164 | if (isSingleWord()) { | |
2165 | char Buffer[65]; | |
2166 | char *BufPtr = Buffer+65; | |
2167 | ||
2168 | uint64_t N; | |
2169 | if (!Signed) { | |
2170 | N = getZExtValue(); | |
2171 | } else { | |
2172 | int64_t I = getSExtValue(); | |
2173 | if (I >= 0) { | |
2174 | N = I; | |
2175 | } else { | |
2176 | Str.push_back('-'); | |
2177 | N = -(uint64_t)I; | |
2178 | } | |
2179 | } | |
2180 | ||
2181 | while (*Prefix) { | |
2182 | Str.push_back(*Prefix); | |
2183 | ++Prefix; | |
2184 | }; | |
2185 | ||
2186 | while (N) { | |
2187 | *--BufPtr = Digits[N % Radix]; | |
2188 | N /= Radix; | |
2189 | } | |
2190 | Str.append(BufPtr, Buffer+65); | |
2191 | return; | |
2192 | } | |
2193 | ||
2194 | APInt Tmp(*this); | |
2195 | ||
2196 | if (Signed && isNegative()) { | |
2197 | // They want to print the signed version and it is a negative value | |
2198 | // Flip the bits and add one to turn it into the equivalent positive | |
2199 | // value and put a '-' in the result. | |
2200 | Tmp.flipAllBits(); | |
1a4d82fc | 2201 | ++Tmp; |
223e47cc LB |
2202 | Str.push_back('-'); |
2203 | } | |
2204 | ||
2205 | while (*Prefix) { | |
2206 | Str.push_back(*Prefix); | |
2207 | ++Prefix; | |
2208 | }; | |
2209 | ||
2210 | // We insert the digits backward, then reverse them to get the right order. | |
2211 | unsigned StartDig = Str.size(); | |
2212 | ||
2213 | // For the 2, 8 and 16 bit cases, we can just shift instead of divide | |
2214 | // because the number of bits per digit (1, 3 and 4 respectively) divides | |
2215 | // equaly. We just shift until the value is zero. | |
2216 | if (Radix == 2 || Radix == 8 || Radix == 16) { | |
2217 | // Just shift tmp right for each digit width until it becomes zero | |
2218 | unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); | |
2219 | unsigned MaskAmt = Radix - 1; | |
2220 | ||
2221 | while (Tmp != 0) { | |
2222 | unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; | |
2223 | Str.push_back(Digits[Digit]); | |
2224 | Tmp = Tmp.lshr(ShiftAmt); | |
2225 | } | |
2226 | } else { | |
2227 | APInt divisor(Radix == 10? 4 : 8, Radix); | |
2228 | while (Tmp != 0) { | |
2229 | APInt APdigit(1, 0); | |
2230 | APInt tmp2(Tmp.getBitWidth(), 0); | |
2231 | divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, | |
2232 | &APdigit); | |
2233 | unsigned Digit = (unsigned)APdigit.getZExtValue(); | |
2234 | assert(Digit < Radix && "divide failed"); | |
2235 | Str.push_back(Digits[Digit]); | |
2236 | Tmp = tmp2; | |
2237 | } | |
2238 | } | |
2239 | ||
2240 | // Reverse the digits before returning. | |
2241 | std::reverse(Str.begin()+StartDig, Str.end()); | |
2242 | } | |
2243 | ||
2244 | /// toString - This returns the APInt as a std::string. Note that this is an | |
2245 | /// inefficient method. It is better to pass in a SmallVector/SmallString | |
2246 | /// to the methods above. | |
2247 | std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { | |
2248 | SmallString<40> S; | |
2249 | toString(S, Radix, Signed, /* formatAsCLiteral = */false); | |
2250 | return S.str(); | |
2251 | } | |
2252 | ||
2253 | ||
2254 | void APInt::dump() const { | |
2255 | SmallString<40> S, U; | |
2256 | this->toStringUnsigned(U); | |
2257 | this->toStringSigned(S); | |
2258 | dbgs() << "APInt(" << BitWidth << "b, " | |
2259 | << U.str() << "u " << S.str() << "s)"; | |
2260 | } | |
2261 | ||
2262 | void APInt::print(raw_ostream &OS, bool isSigned) const { | |
2263 | SmallString<40> S; | |
2264 | this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); | |
2265 | OS << S.str(); | |
2266 | } | |
2267 | ||
2268 | // This implements a variety of operations on a representation of | |
2269 | // arbitrary precision, two's-complement, bignum integer values. | |
2270 | ||
2271 | // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe | |
2272 | // and unrestricting assumption. | |
2273 | #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] | |
2274 | COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0); | |
2275 | ||
2276 | /* Some handy functions local to this file. */ | |
2277 | namespace { | |
2278 | ||
2279 | /* Returns the integer part with the least significant BITS set. | |
2280 | BITS cannot be zero. */ | |
2281 | static inline integerPart | |
2282 | lowBitMask(unsigned int bits) | |
2283 | { | |
2284 | assert(bits != 0 && bits <= integerPartWidth); | |
2285 | ||
2286 | return ~(integerPart) 0 >> (integerPartWidth - bits); | |
2287 | } | |
2288 | ||
2289 | /* Returns the value of the lower half of PART. */ | |
2290 | static inline integerPart | |
2291 | lowHalf(integerPart part) | |
2292 | { | |
2293 | return part & lowBitMask(integerPartWidth / 2); | |
2294 | } | |
2295 | ||
2296 | /* Returns the value of the upper half of PART. */ | |
2297 | static inline integerPart | |
2298 | highHalf(integerPart part) | |
2299 | { | |
2300 | return part >> (integerPartWidth / 2); | |
2301 | } | |
2302 | ||
2303 | /* Returns the bit number of the most significant set bit of a part. | |
2304 | If the input number has no bits set -1U is returned. */ | |
2305 | static unsigned int | |
2306 | partMSB(integerPart value) | |
2307 | { | |
1a4d82fc | 2308 | return findLastSet(value, ZB_Max); |
223e47cc LB |
2309 | } |
2310 | ||
2311 | /* Returns the bit number of the least significant set bit of a | |
2312 | part. If the input number has no bits set -1U is returned. */ | |
2313 | static unsigned int | |
2314 | partLSB(integerPart value) | |
2315 | { | |
1a4d82fc | 2316 | return findFirstSet(value, ZB_Max); |
223e47cc LB |
2317 | } |
2318 | } | |
2319 | ||
2320 | /* Sets the least significant part of a bignum to the input value, and | |
2321 | zeroes out higher parts. */ | |
2322 | void | |
2323 | APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts) | |
2324 | { | |
2325 | unsigned int i; | |
2326 | ||
2327 | assert(parts > 0); | |
2328 | ||
2329 | dst[0] = part; | |
2330 | for (i = 1; i < parts; i++) | |
2331 | dst[i] = 0; | |
2332 | } | |
2333 | ||
2334 | /* Assign one bignum to another. */ | |
2335 | void | |
2336 | APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts) | |
2337 | { | |
2338 | unsigned int i; | |
2339 | ||
2340 | for (i = 0; i < parts; i++) | |
2341 | dst[i] = src[i]; | |
2342 | } | |
2343 | ||
2344 | /* Returns true if a bignum is zero, false otherwise. */ | |
2345 | bool | |
2346 | APInt::tcIsZero(const integerPart *src, unsigned int parts) | |
2347 | { | |
2348 | unsigned int i; | |
2349 | ||
2350 | for (i = 0; i < parts; i++) | |
2351 | if (src[i]) | |
2352 | return false; | |
2353 | ||
2354 | return true; | |
2355 | } | |
2356 | ||
2357 | /* Extract the given bit of a bignum; returns 0 or 1. */ | |
2358 | int | |
2359 | APInt::tcExtractBit(const integerPart *parts, unsigned int bit) | |
2360 | { | |
2361 | return (parts[bit / integerPartWidth] & | |
2362 | ((integerPart) 1 << bit % integerPartWidth)) != 0; | |
2363 | } | |
2364 | ||
2365 | /* Set the given bit of a bignum. */ | |
2366 | void | |
2367 | APInt::tcSetBit(integerPart *parts, unsigned int bit) | |
2368 | { | |
2369 | parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth); | |
2370 | } | |
2371 | ||
2372 | /* Clears the given bit of a bignum. */ | |
2373 | void | |
2374 | APInt::tcClearBit(integerPart *parts, unsigned int bit) | |
2375 | { | |
2376 | parts[bit / integerPartWidth] &= | |
2377 | ~((integerPart) 1 << (bit % integerPartWidth)); | |
2378 | } | |
2379 | ||
2380 | /* Returns the bit number of the least significant set bit of a | |
2381 | number. If the input number has no bits set -1U is returned. */ | |
2382 | unsigned int | |
2383 | APInt::tcLSB(const integerPart *parts, unsigned int n) | |
2384 | { | |
2385 | unsigned int i, lsb; | |
2386 | ||
2387 | for (i = 0; i < n; i++) { | |
2388 | if (parts[i] != 0) { | |
2389 | lsb = partLSB(parts[i]); | |
2390 | ||
2391 | return lsb + i * integerPartWidth; | |
2392 | } | |
2393 | } | |
2394 | ||
2395 | return -1U; | |
2396 | } | |
2397 | ||
2398 | /* Returns the bit number of the most significant set bit of a number. | |
2399 | If the input number has no bits set -1U is returned. */ | |
2400 | unsigned int | |
2401 | APInt::tcMSB(const integerPart *parts, unsigned int n) | |
2402 | { | |
2403 | unsigned int msb; | |
2404 | ||
2405 | do { | |
2406 | --n; | |
2407 | ||
2408 | if (parts[n] != 0) { | |
2409 | msb = partMSB(parts[n]); | |
2410 | ||
2411 | return msb + n * integerPartWidth; | |
2412 | } | |
2413 | } while (n); | |
2414 | ||
2415 | return -1U; | |
2416 | } | |
2417 | ||
2418 | /* Copy the bit vector of width srcBITS from SRC, starting at bit | |
2419 | srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes | |
2420 | the least significant bit of DST. All high bits above srcBITS in | |
2421 | DST are zero-filled. */ | |
2422 | void | |
2423 | APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src, | |
2424 | unsigned int srcBits, unsigned int srcLSB) | |
2425 | { | |
2426 | unsigned int firstSrcPart, dstParts, shift, n; | |
2427 | ||
2428 | dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth; | |
2429 | assert(dstParts <= dstCount); | |
2430 | ||
2431 | firstSrcPart = srcLSB / integerPartWidth; | |
2432 | tcAssign (dst, src + firstSrcPart, dstParts); | |
2433 | ||
2434 | shift = srcLSB % integerPartWidth; | |
2435 | tcShiftRight (dst, dstParts, shift); | |
2436 | ||
2437 | /* We now have (dstParts * integerPartWidth - shift) bits from SRC | |
2438 | in DST. If this is less that srcBits, append the rest, else | |
2439 | clear the high bits. */ | |
2440 | n = dstParts * integerPartWidth - shift; | |
2441 | if (n < srcBits) { | |
2442 | integerPart mask = lowBitMask (srcBits - n); | |
2443 | dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) | |
2444 | << n % integerPartWidth); | |
2445 | } else if (n > srcBits) { | |
2446 | if (srcBits % integerPartWidth) | |
2447 | dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth); | |
2448 | } | |
2449 | ||
2450 | /* Clear high parts. */ | |
2451 | while (dstParts < dstCount) | |
2452 | dst[dstParts++] = 0; | |
2453 | } | |
2454 | ||
2455 | /* DST += RHS + C where C is zero or one. Returns the carry flag. */ | |
2456 | integerPart | |
2457 | APInt::tcAdd(integerPart *dst, const integerPart *rhs, | |
2458 | integerPart c, unsigned int parts) | |
2459 | { | |
2460 | unsigned int i; | |
2461 | ||
2462 | assert(c <= 1); | |
2463 | ||
2464 | for (i = 0; i < parts; i++) { | |
2465 | integerPart l; | |
2466 | ||
2467 | l = dst[i]; | |
2468 | if (c) { | |
2469 | dst[i] += rhs[i] + 1; | |
2470 | c = (dst[i] <= l); | |
2471 | } else { | |
2472 | dst[i] += rhs[i]; | |
2473 | c = (dst[i] < l); | |
2474 | } | |
2475 | } | |
2476 | ||
2477 | return c; | |
2478 | } | |
2479 | ||
2480 | /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ | |
2481 | integerPart | |
2482 | APInt::tcSubtract(integerPart *dst, const integerPart *rhs, | |
2483 | integerPart c, unsigned int parts) | |
2484 | { | |
2485 | unsigned int i; | |
2486 | ||
2487 | assert(c <= 1); | |
2488 | ||
2489 | for (i = 0; i < parts; i++) { | |
2490 | integerPart l; | |
2491 | ||
2492 | l = dst[i]; | |
2493 | if (c) { | |
2494 | dst[i] -= rhs[i] + 1; | |
2495 | c = (dst[i] >= l); | |
2496 | } else { | |
2497 | dst[i] -= rhs[i]; | |
2498 | c = (dst[i] > l); | |
2499 | } | |
2500 | } | |
2501 | ||
2502 | return c; | |
2503 | } | |
2504 | ||
2505 | /* Negate a bignum in-place. */ | |
2506 | void | |
2507 | APInt::tcNegate(integerPart *dst, unsigned int parts) | |
2508 | { | |
2509 | tcComplement(dst, parts); | |
2510 | tcIncrement(dst, parts); | |
2511 | } | |
2512 | ||
2513 | /* DST += SRC * MULTIPLIER + CARRY if add is true | |
2514 | DST = SRC * MULTIPLIER + CARRY if add is false | |
2515 | ||
2516 | Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC | |
2517 | they must start at the same point, i.e. DST == SRC. | |
2518 | ||
2519 | If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is | |
2520 | returned. Otherwise DST is filled with the least significant | |
2521 | DSTPARTS parts of the result, and if all of the omitted higher | |
2522 | parts were zero return zero, otherwise overflow occurred and | |
2523 | return one. */ | |
2524 | int | |
2525 | APInt::tcMultiplyPart(integerPart *dst, const integerPart *src, | |
2526 | integerPart multiplier, integerPart carry, | |
2527 | unsigned int srcParts, unsigned int dstParts, | |
2528 | bool add) | |
2529 | { | |
2530 | unsigned int i, n; | |
2531 | ||
2532 | /* Otherwise our writes of DST kill our later reads of SRC. */ | |
2533 | assert(dst <= src || dst >= src + srcParts); | |
2534 | assert(dstParts <= srcParts + 1); | |
2535 | ||
2536 | /* N loops; minimum of dstParts and srcParts. */ | |
2537 | n = dstParts < srcParts ? dstParts: srcParts; | |
2538 | ||
2539 | for (i = 0; i < n; i++) { | |
2540 | integerPart low, mid, high, srcPart; | |
2541 | ||
2542 | /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. | |
2543 | ||
2544 | This cannot overflow, because | |
2545 | ||
2546 | (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) | |
2547 | ||
2548 | which is less than n^2. */ | |
2549 | ||
2550 | srcPart = src[i]; | |
2551 | ||
2552 | if (multiplier == 0 || srcPart == 0) { | |
2553 | low = carry; | |
2554 | high = 0; | |
2555 | } else { | |
2556 | low = lowHalf(srcPart) * lowHalf(multiplier); | |
2557 | high = highHalf(srcPart) * highHalf(multiplier); | |
2558 | ||
2559 | mid = lowHalf(srcPart) * highHalf(multiplier); | |
2560 | high += highHalf(mid); | |
2561 | mid <<= integerPartWidth / 2; | |
2562 | if (low + mid < low) | |
2563 | high++; | |
2564 | low += mid; | |
2565 | ||
2566 | mid = highHalf(srcPart) * lowHalf(multiplier); | |
2567 | high += highHalf(mid); | |
2568 | mid <<= integerPartWidth / 2; | |
2569 | if (low + mid < low) | |
2570 | high++; | |
2571 | low += mid; | |
2572 | ||
2573 | /* Now add carry. */ | |
2574 | if (low + carry < low) | |
2575 | high++; | |
2576 | low += carry; | |
2577 | } | |
2578 | ||
2579 | if (add) { | |
2580 | /* And now DST[i], and store the new low part there. */ | |
2581 | if (low + dst[i] < low) | |
2582 | high++; | |
2583 | dst[i] += low; | |
2584 | } else | |
2585 | dst[i] = low; | |
2586 | ||
2587 | carry = high; | |
2588 | } | |
2589 | ||
2590 | if (i < dstParts) { | |
2591 | /* Full multiplication, there is no overflow. */ | |
2592 | assert(i + 1 == dstParts); | |
2593 | dst[i] = carry; | |
2594 | return 0; | |
2595 | } else { | |
2596 | /* We overflowed if there is carry. */ | |
2597 | if (carry) | |
2598 | return 1; | |
2599 | ||
2600 | /* We would overflow if any significant unwritten parts would be | |
2601 | non-zero. This is true if any remaining src parts are non-zero | |
2602 | and the multiplier is non-zero. */ | |
2603 | if (multiplier) | |
2604 | for (; i < srcParts; i++) | |
2605 | if (src[i]) | |
2606 | return 1; | |
2607 | ||
2608 | /* We fitted in the narrow destination. */ | |
2609 | return 0; | |
2610 | } | |
2611 | } | |
2612 | ||
2613 | /* DST = LHS * RHS, where DST has the same width as the operands and | |
2614 | is filled with the least significant parts of the result. Returns | |
2615 | one if overflow occurred, otherwise zero. DST must be disjoint | |
2616 | from both operands. */ | |
2617 | int | |
2618 | APInt::tcMultiply(integerPart *dst, const integerPart *lhs, | |
2619 | const integerPart *rhs, unsigned int parts) | |
2620 | { | |
2621 | unsigned int i; | |
2622 | int overflow; | |
2623 | ||
2624 | assert(dst != lhs && dst != rhs); | |
2625 | ||
2626 | overflow = 0; | |
2627 | tcSet(dst, 0, parts); | |
2628 | ||
2629 | for (i = 0; i < parts; i++) | |
2630 | overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, | |
2631 | parts - i, true); | |
2632 | ||
2633 | return overflow; | |
2634 | } | |
2635 | ||
2636 | /* DST = LHS * RHS, where DST has width the sum of the widths of the | |
2637 | operands. No overflow occurs. DST must be disjoint from both | |
2638 | operands. Returns the number of parts required to hold the | |
2639 | result. */ | |
2640 | unsigned int | |
2641 | APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs, | |
2642 | const integerPart *rhs, unsigned int lhsParts, | |
2643 | unsigned int rhsParts) | |
2644 | { | |
2645 | /* Put the narrower number on the LHS for less loops below. */ | |
2646 | if (lhsParts > rhsParts) { | |
2647 | return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); | |
2648 | } else { | |
2649 | unsigned int n; | |
2650 | ||
2651 | assert(dst != lhs && dst != rhs); | |
2652 | ||
2653 | tcSet(dst, 0, rhsParts); | |
2654 | ||
2655 | for (n = 0; n < lhsParts; n++) | |
2656 | tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true); | |
2657 | ||
2658 | n = lhsParts + rhsParts; | |
2659 | ||
2660 | return n - (dst[n - 1] == 0); | |
2661 | } | |
2662 | } | |
2663 | ||
2664 | /* If RHS is zero LHS and REMAINDER are left unchanged, return one. | |
2665 | Otherwise set LHS to LHS / RHS with the fractional part discarded, | |
2666 | set REMAINDER to the remainder, return zero. i.e. | |
2667 | ||
2668 | OLD_LHS = RHS * LHS + REMAINDER | |
2669 | ||
2670 | SCRATCH is a bignum of the same size as the operands and result for | |
2671 | use by the routine; its contents need not be initialized and are | |
2672 | destroyed. LHS, REMAINDER and SCRATCH must be distinct. | |
2673 | */ | |
2674 | int | |
2675 | APInt::tcDivide(integerPart *lhs, const integerPart *rhs, | |
2676 | integerPart *remainder, integerPart *srhs, | |
2677 | unsigned int parts) | |
2678 | { | |
2679 | unsigned int n, shiftCount; | |
2680 | integerPart mask; | |
2681 | ||
2682 | assert(lhs != remainder && lhs != srhs && remainder != srhs); | |
2683 | ||
2684 | shiftCount = tcMSB(rhs, parts) + 1; | |
2685 | if (shiftCount == 0) | |
2686 | return true; | |
2687 | ||
2688 | shiftCount = parts * integerPartWidth - shiftCount; | |
2689 | n = shiftCount / integerPartWidth; | |
2690 | mask = (integerPart) 1 << (shiftCount % integerPartWidth); | |
2691 | ||
2692 | tcAssign(srhs, rhs, parts); | |
2693 | tcShiftLeft(srhs, parts, shiftCount); | |
2694 | tcAssign(remainder, lhs, parts); | |
2695 | tcSet(lhs, 0, parts); | |
2696 | ||
2697 | /* Loop, subtracting SRHS if REMAINDER is greater and adding that to | |
2698 | the total. */ | |
2699 | for (;;) { | |
2700 | int compare; | |
2701 | ||
2702 | compare = tcCompare(remainder, srhs, parts); | |
2703 | if (compare >= 0) { | |
2704 | tcSubtract(remainder, srhs, 0, parts); | |
2705 | lhs[n] |= mask; | |
2706 | } | |
2707 | ||
2708 | if (shiftCount == 0) | |
2709 | break; | |
2710 | shiftCount--; | |
2711 | tcShiftRight(srhs, parts, 1); | |
2712 | if ((mask >>= 1) == 0) | |
2713 | mask = (integerPart) 1 << (integerPartWidth - 1), n--; | |
2714 | } | |
2715 | ||
2716 | return false; | |
2717 | } | |
2718 | ||
2719 | /* Shift a bignum left COUNT bits in-place. Shifted in bits are zero. | |
2720 | There are no restrictions on COUNT. */ | |
2721 | void | |
2722 | APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count) | |
2723 | { | |
2724 | if (count) { | |
2725 | unsigned int jump, shift; | |
2726 | ||
2727 | /* Jump is the inter-part jump; shift is is intra-part shift. */ | |
2728 | jump = count / integerPartWidth; | |
2729 | shift = count % integerPartWidth; | |
2730 | ||
2731 | while (parts > jump) { | |
2732 | integerPart part; | |
2733 | ||
2734 | parts--; | |
2735 | ||
2736 | /* dst[i] comes from the two parts src[i - jump] and, if we have | |
2737 | an intra-part shift, src[i - jump - 1]. */ | |
2738 | part = dst[parts - jump]; | |
2739 | if (shift) { | |
2740 | part <<= shift; | |
2741 | if (parts >= jump + 1) | |
2742 | part |= dst[parts - jump - 1] >> (integerPartWidth - shift); | |
2743 | } | |
2744 | ||
2745 | dst[parts] = part; | |
2746 | } | |
2747 | ||
2748 | while (parts > 0) | |
2749 | dst[--parts] = 0; | |
2750 | } | |
2751 | } | |
2752 | ||
2753 | /* Shift a bignum right COUNT bits in-place. Shifted in bits are | |
2754 | zero. There are no restrictions on COUNT. */ | |
2755 | void | |
2756 | APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count) | |
2757 | { | |
2758 | if (count) { | |
2759 | unsigned int i, jump, shift; | |
2760 | ||
2761 | /* Jump is the inter-part jump; shift is is intra-part shift. */ | |
2762 | jump = count / integerPartWidth; | |
2763 | shift = count % integerPartWidth; | |
2764 | ||
2765 | /* Perform the shift. This leaves the most significant COUNT bits | |
2766 | of the result at zero. */ | |
2767 | for (i = 0; i < parts; i++) { | |
2768 | integerPart part; | |
2769 | ||
2770 | if (i + jump >= parts) { | |
2771 | part = 0; | |
2772 | } else { | |
2773 | part = dst[i + jump]; | |
2774 | if (shift) { | |
2775 | part >>= shift; | |
2776 | if (i + jump + 1 < parts) | |
2777 | part |= dst[i + jump + 1] << (integerPartWidth - shift); | |
2778 | } | |
2779 | } | |
2780 | ||
2781 | dst[i] = part; | |
2782 | } | |
2783 | } | |
2784 | } | |
2785 | ||
2786 | /* Bitwise and of two bignums. */ | |
2787 | void | |
2788 | APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts) | |
2789 | { | |
2790 | unsigned int i; | |
2791 | ||
2792 | for (i = 0; i < parts; i++) | |
2793 | dst[i] &= rhs[i]; | |
2794 | } | |
2795 | ||
2796 | /* Bitwise inclusive or of two bignums. */ | |
2797 | void | |
2798 | APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts) | |
2799 | { | |
2800 | unsigned int i; | |
2801 | ||
2802 | for (i = 0; i < parts; i++) | |
2803 | dst[i] |= rhs[i]; | |
2804 | } | |
2805 | ||
2806 | /* Bitwise exclusive or of two bignums. */ | |
2807 | void | |
2808 | APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts) | |
2809 | { | |
2810 | unsigned int i; | |
2811 | ||
2812 | for (i = 0; i < parts; i++) | |
2813 | dst[i] ^= rhs[i]; | |
2814 | } | |
2815 | ||
2816 | /* Complement a bignum in-place. */ | |
2817 | void | |
2818 | APInt::tcComplement(integerPart *dst, unsigned int parts) | |
2819 | { | |
2820 | unsigned int i; | |
2821 | ||
2822 | for (i = 0; i < parts; i++) | |
2823 | dst[i] = ~dst[i]; | |
2824 | } | |
2825 | ||
2826 | /* Comparison (unsigned) of two bignums. */ | |
2827 | int | |
2828 | APInt::tcCompare(const integerPart *lhs, const integerPart *rhs, | |
2829 | unsigned int parts) | |
2830 | { | |
2831 | while (parts) { | |
2832 | parts--; | |
2833 | if (lhs[parts] == rhs[parts]) | |
2834 | continue; | |
2835 | ||
2836 | if (lhs[parts] > rhs[parts]) | |
2837 | return 1; | |
2838 | else | |
2839 | return -1; | |
2840 | } | |
2841 | ||
2842 | return 0; | |
2843 | } | |
2844 | ||
2845 | /* Increment a bignum in-place, return the carry flag. */ | |
2846 | integerPart | |
2847 | APInt::tcIncrement(integerPart *dst, unsigned int parts) | |
2848 | { | |
2849 | unsigned int i; | |
2850 | ||
2851 | for (i = 0; i < parts; i++) | |
2852 | if (++dst[i] != 0) | |
2853 | break; | |
2854 | ||
2855 | return i == parts; | |
2856 | } | |
2857 | ||
1a4d82fc JJ |
2858 | /* Decrement a bignum in-place, return the borrow flag. */ |
2859 | integerPart | |
2860 | APInt::tcDecrement(integerPart *dst, unsigned int parts) { | |
2861 | for (unsigned int i = 0; i < parts; i++) { | |
2862 | // If the current word is non-zero, then the decrement has no effect on the | |
2863 | // higher-order words of the integer and no borrow can occur. Exit early. | |
2864 | if (dst[i]--) | |
2865 | return 0; | |
2866 | } | |
2867 | // If every word was zero, then there is a borrow. | |
2868 | return 1; | |
2869 | } | |
2870 | ||
2871 | ||
223e47cc LB |
2872 | /* Set the least significant BITS bits of a bignum, clear the |
2873 | rest. */ | |
2874 | void | |
2875 | APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts, | |
2876 | unsigned int bits) | |
2877 | { | |
2878 | unsigned int i; | |
2879 | ||
2880 | i = 0; | |
2881 | while (bits > integerPartWidth) { | |
2882 | dst[i++] = ~(integerPart) 0; | |
2883 | bits -= integerPartWidth; | |
2884 | } | |
2885 | ||
2886 | if (bits) | |
2887 | dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits); | |
2888 | ||
2889 | while (i < parts) | |
2890 | dst[i++] = 0; | |
2891 | } |