include/qemu/host-utils.h

   1 /*
   2  * Utility compute operations used by translated code.
   3  *
   4  * Copyright (c) 2007 Thiemo Seufer
   5  * Copyright (c) 2007 Jocelyn Mayer
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  */
  25
  26 /* Portions of this work are licensed under the terms of the GNU GPL,
  27  * version 2 or later. See the COPYING file in the top-level directory.
  28  */
  29
  30 #ifndef HOST_UTILS_H
  31 #define HOST_UTILS_H
  32
  33 #include "qemu/bswap.h"
  34 #include "qemu/int128.h"
  35
  36 #ifdef CONFIG_INT128
  37 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
  38                           uint64_t a, uint64_t b)
  39 {
  40     __uint128_t r = (__uint128_t)a * b;
  41     *plow = r;
  42     *phigh = r >> 64;
  43 }
  44
  45 static inline void muls64(uint64_t *plow, uint64_t *phigh,
  46                           int64_t a, int64_t b)
  47 {
  48     __int128_t r = (__int128_t)a * b;
  49     *plow = r;
  50     *phigh = r >> 64;
  51 }
  52
  53 /* compute with 96 bit intermediate result: (a*b)/c */
  54 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  55 {
  56     return (__int128_t)a * b / c;
  57 }
  58
  59 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
  60                                uint64_t divisor)
  61 {
  62     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
  63     __uint128_t result = dividend / divisor;
  64
  65     *plow = result;
  66     *phigh = result >> 64;
  67     return dividend % divisor;
  68 }
  69
  70 static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
  71                               int64_t divisor)
  72 {
  73     __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
  74     __int128_t result = dividend / divisor;
  75
  76     *plow = result;
  77     *phigh = result >> 64;
  78     return dividend % divisor;
  79 }
  80 #else
  81 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  82 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
  83 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
  84 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
  85
  86 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  87 {
  88     union {
  89         uint64_t ll;
  90         struct {
  91 #if HOST_BIG_ENDIAN
  92             uint32_t high, low;
  93 #else
  94             uint32_t low, high;
  95 #endif
  96         } l;
  97     } u, res;
  98     uint64_t rl, rh;
  99
 100     u.ll = a;
 101     rl = (uint64_t)u.l.low * (uint64_t)b;
 102     rh = (uint64_t)u.l.high * (uint64_t)b;
 103     rh += (rl >> 32);
 104     res.l.high = rh / c;
 105     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
 106     return res.ll;
 107 }
 108 #endif
 109
 110 /**
 111  * clz8 - count leading zeros in a 8-bit value.
 112  * @val: The value to search
 113  *
 114  * Returns 8 if the value is zero.  Note that the GCC builtin is
 115  * undefined if the value is zero.
 116  *
 117  * Note that the GCC builtin will upcast its argument to an `unsigned int`
 118  * so this function subtracts off the number of prepended zeroes.
 119  */
 120 static inline int clz8(uint8_t val)
 121 {
 122     return val ? __builtin_clz(val) - 24 : 8;
 123 }
 124
 125 /**
 126  * clz16 - count leading zeros in a 16-bit value.
 127  * @val: The value to search
 128  *
 129  * Returns 16 if the value is zero.  Note that the GCC builtin is
 130  * undefined if the value is zero.
 131  *
 132  * Note that the GCC builtin will upcast its argument to an `unsigned int`
 133  * so this function subtracts off the number of prepended zeroes.
 134  */
 135 static inline int clz16(uint16_t val)
 136 {
 137     return val ? __builtin_clz(val) - 16 : 16;
 138 }
 139
 140 /**
 141  * clz32 - count leading zeros in a 32-bit value.
 142  * @val: The value to search
 143  *
 144  * Returns 32 if the value is zero.  Note that the GCC builtin is
 145  * undefined if the value is zero.
 146  */
 147 static inline int clz32(uint32_t val)
 148 {
 149     return val ? __builtin_clz(val) : 32;
 150 }
 151
 152 /**
 153  * clo32 - count leading ones in a 32-bit value.
 154  * @val: The value to search
 155  *
 156  * Returns 32 if the value is -1.
 157  */
 158 static inline int clo32(uint32_t val)
 159 {
 160     return clz32(~val);
 161 }
 162
 163 /**
 164  * clz64 - count leading zeros in a 64-bit value.
 165  * @val: The value to search
 166  *
 167  * Returns 64 if the value is zero.  Note that the GCC builtin is
 168  * undefined if the value is zero.
 169  */
 170 static inline int clz64(uint64_t val)
 171 {
 172     return val ? __builtin_clzll(val) : 64;
 173 }
 174
 175 /**
 176  * clo64 - count leading ones in a 64-bit value.
 177  * @val: The value to search
 178  *
 179  * Returns 64 if the value is -1.
 180  */
 181 static inline int clo64(uint64_t val)
 182 {
 183     return clz64(~val);
 184 }
 185
 186 /**
 187  * ctz8 - count trailing zeros in a 8-bit value.
 188  * @val: The value to search
 189  *
 190  * Returns 8 if the value is zero.  Note that the GCC builtin is
 191  * undefined if the value is zero.
 192  */
 193 static inline int ctz8(uint8_t val)
 194 {
 195     return val ? __builtin_ctz(val) : 8;
 196 }
 197
 198 /**
 199  * ctz16 - count trailing zeros in a 16-bit value.
 200  * @val: The value to search
 201  *
 202  * Returns 16 if the value is zero.  Note that the GCC builtin is
 203  * undefined if the value is zero.
 204  */
 205 static inline int ctz16(uint16_t val)
 206 {
 207     return val ? __builtin_ctz(val) : 16;
 208 }
 209
 210 /**
 211  * ctz32 - count trailing zeros in a 32-bit value.
 212  * @val: The value to search
 213  *
 214  * Returns 32 if the value is zero.  Note that the GCC builtin is
 215  * undefined if the value is zero.
 216  */
 217 static inline int ctz32(uint32_t val)
 218 {
 219     return val ? __builtin_ctz(val) : 32;
 220 }
 221
 222 /**
 223  * cto32 - count trailing ones in a 32-bit value.
 224  * @val: The value to search
 225  *
 226  * Returns 32 if the value is -1.
 227  */
 228 static inline int cto32(uint32_t val)
 229 {
 230     return ctz32(~val);
 231 }
 232
 233 /**
 234  * ctz64 - count trailing zeros in a 64-bit value.
 235  * @val: The value to search
 236  *
 237  * Returns 64 if the value is zero.  Note that the GCC builtin is
 238  * undefined if the value is zero.
 239  */
 240 static inline int ctz64(uint64_t val)
 241 {
 242     return val ? __builtin_ctzll(val) : 64;
 243 }
 244
 245 /**
 246  * cto64 - count trailing ones in a 64-bit value.
 247  * @val: The value to search
 248  *
 249  * Returns 64 if the value is -1.
 250  */
 251 static inline int cto64(uint64_t val)
 252 {
 253     return ctz64(~val);
 254 }
 255
 256 /**
 257  * clrsb32 - count leading redundant sign bits in a 32-bit value.
 258  * @val: The value to search
 259  *
 260  * Returns the number of bits following the sign bit that are equal to it.
 261  * No special cases; output range is [0-31].
 262  */
 263 static inline int clrsb32(uint32_t val)
 264 {
 265 #if __has_builtin(__builtin_clrsb) || !defined(__clang__)
 266     return __builtin_clrsb(val);
 267 #else
 268     return clz32(val ^ ((int32_t)val >> 1)) - 1;
 269 #endif
 270 }
 271
 272 /**
 273  * clrsb64 - count leading redundant sign bits in a 64-bit value.
 274  * @val: The value to search
 275  *
 276  * Returns the number of bits following the sign bit that are equal to it.
 277  * No special cases; output range is [0-63].
 278  */
 279 static inline int clrsb64(uint64_t val)
 280 {
 281 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
 282     return __builtin_clrsbll(val);
 283 #else
 284     return clz64(val ^ ((int64_t)val >> 1)) - 1;
 285 #endif
 286 }
 287
 288 /**
 289  * ctpop8 - count the population of one bits in an 8-bit value.
 290  * @val: The value to search
 291  */
 292 static inline int ctpop8(uint8_t val)
 293 {
 294     return __builtin_popcount(val);
 295 }
 296
 297 /**
 298  * ctpop16 - count the population of one bits in a 16-bit value.
 299  * @val: The value to search
 300  */
 301 static inline int ctpop16(uint16_t val)
 302 {
 303     return __builtin_popcount(val);
 304 }
 305
 306 /**
 307  * ctpop32 - count the population of one bits in a 32-bit value.
 308  * @val: The value to search
 309  */
 310 static inline int ctpop32(uint32_t val)
 311 {
 312     return __builtin_popcount(val);
 313 }
 314
 315 /**
 316  * ctpop64 - count the population of one bits in a 64-bit value.
 317  * @val: The value to search
 318  */
 319 static inline int ctpop64(uint64_t val)
 320 {
 321     return __builtin_popcountll(val);
 322 }
 323
 324 /**
 325  * revbit8 - reverse the bits in an 8-bit value.
 326  * @x: The value to modify.
 327  */
 328 static inline uint8_t revbit8(uint8_t x)
 329 {
 330 #if __has_builtin(__builtin_bitreverse8)
 331     return __builtin_bitreverse8(x);
 332 #else
 333     /* Assign the correct nibble position.  */
 334     x = ((x & 0xf0) >> 4)
 335       | ((x & 0x0f) << 4);
 336     /* Assign the correct bit position.  */
 337     x = ((x & 0x88) >> 3)
 338       | ((x & 0x44) >> 1)
 339       | ((x & 0x22) << 1)
 340       | ((x & 0x11) << 3);
 341     return x;
 342 #endif
 343 }
 344
 345 /**
 346  * revbit16 - reverse the bits in a 16-bit value.
 347  * @x: The value to modify.
 348  */
 349 static inline uint16_t revbit16(uint16_t x)
 350 {
 351 #if __has_builtin(__builtin_bitreverse16)
 352     return __builtin_bitreverse16(x);
 353 #else
 354     /* Assign the correct byte position.  */
 355     x = bswap16(x);
 356     /* Assign the correct nibble position.  */
 357     x = ((x & 0xf0f0) >> 4)
 358       | ((x & 0x0f0f) << 4);
 359     /* Assign the correct bit position.  */
 360     x = ((x & 0x8888) >> 3)
 361       | ((x & 0x4444) >> 1)
 362       | ((x & 0x2222) << 1)
 363       | ((x & 0x1111) << 3);
 364     return x;
 365 #endif
 366 }
 367
 368 /**
 369  * revbit32 - reverse the bits in a 32-bit value.
 370  * @x: The value to modify.
 371  */
 372 static inline uint32_t revbit32(uint32_t x)
 373 {
 374 #if __has_builtin(__builtin_bitreverse32)
 375     return __builtin_bitreverse32(x);
 376 #else
 377     /* Assign the correct byte position.  */
 378     x = bswap32(x);
 379     /* Assign the correct nibble position.  */
 380     x = ((x & 0xf0f0f0f0u) >> 4)
 381       | ((x & 0x0f0f0f0fu) << 4);
 382     /* Assign the correct bit position.  */
 383     x = ((x & 0x88888888u) >> 3)
 384       | ((x & 0x44444444u) >> 1)
 385       | ((x & 0x22222222u) << 1)
 386       | ((x & 0x11111111u) << 3);
 387     return x;
 388 #endif
 389 }
 390
 391 /**
 392  * revbit64 - reverse the bits in a 64-bit value.
 393  * @x: The value to modify.
 394  */
 395 static inline uint64_t revbit64(uint64_t x)
 396 {
 397 #if __has_builtin(__builtin_bitreverse64)
 398     return __builtin_bitreverse64(x);
 399 #else
 400     /* Assign the correct byte position.  */
 401     x = bswap64(x);
 402     /* Assign the correct nibble position.  */
 403     x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
 404       | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
 405     /* Assign the correct bit position.  */
 406     x = ((x & 0x8888888888888888ull) >> 3)
 407       | ((x & 0x4444444444444444ull) >> 1)
 408       | ((x & 0x2222222222222222ull) << 1)
 409       | ((x & 0x1111111111111111ull) << 3);
 410     return x;
 411 #endif
 412 }
 413
 414 /**
 415  * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
 416  */
 417 static inline uint64_t uabs64(int64_t v)
 418 {
 419     return v < 0 ? -v : v;
 420 }
 421
 422 /**
 423  * sadd32_overflow - addition with overflow indication
 424  * @x, @y: addends
 425  * @ret: Output for sum
 426  *
 427  * Computes *@ret = @x + @y, and returns true if and only if that
 428  * value has been truncated.
 429  */
 430 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
 431 {
 432     return __builtin_add_overflow(x, y, ret);
 433 }
 434
 435 /**
 436  * sadd64_overflow - addition with overflow indication
 437  * @x, @y: addends
 438  * @ret: Output for sum
 439  *
 440  * Computes *@ret = @x + @y, and returns true if and only if that
 441  * value has been truncated.
 442  */
 443 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
 444 {
 445     return __builtin_add_overflow(x, y, ret);
 446 }
 447
 448 /**
 449  * uadd32_overflow - addition with overflow indication
 450  * @x, @y: addends
 451  * @ret: Output for sum
 452  *
 453  * Computes *@ret = @x + @y, and returns true if and only if that
 454  * value has been truncated.
 455  */
 456 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 457 {
 458     return __builtin_add_overflow(x, y, ret);
 459 }
 460
 461 /**
 462  * uadd64_overflow - addition with overflow indication
 463  * @x, @y: addends
 464  * @ret: Output for sum
 465  *
 466  * Computes *@ret = @x + @y, and returns true if and only if that
 467  * value has been truncated.
 468  */
 469 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 470 {
 471     return __builtin_add_overflow(x, y, ret);
 472 }
 473
 474 /**
 475  * ssub32_overflow - subtraction with overflow indication
 476  * @x: Minuend
 477  * @y: Subtrahend
 478  * @ret: Output for difference
 479  *
 480  * Computes *@ret = @x - @y, and returns true if and only if that
 481  * value has been truncated.
 482  */
 483 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
 484 {
 485     return __builtin_sub_overflow(x, y, ret);
 486 }
 487
 488 /**
 489  * ssub64_overflow - subtraction with overflow indication
 490  * @x: Minuend
 491  * @y: Subtrahend
 492  * @ret: Output for sum
 493  *
 494  * Computes *@ret = @x - @y, and returns true if and only if that
 495  * value has been truncated.
 496  */
 497 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
 498 {
 499     return __builtin_sub_overflow(x, y, ret);
 500 }
 501
 502 /**
 503  * usub32_overflow - subtraction with overflow indication
 504  * @x: Minuend
 505  * @y: Subtrahend
 506  * @ret: Output for sum
 507  *
 508  * Computes *@ret = @x - @y, and returns true if and only if that
 509  * value has been truncated.
 510  */
 511 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 512 {
 513     return __builtin_sub_overflow(x, y, ret);
 514 }
 515
 516 /**
 517  * usub64_overflow - subtraction with overflow indication
 518  * @x: Minuend
 519  * @y: Subtrahend
 520  * @ret: Output for sum
 521  *
 522  * Computes *@ret = @x - @y, and returns true if and only if that
 523  * value has been truncated.
 524  */
 525 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 526 {
 527     return __builtin_sub_overflow(x, y, ret);
 528 }
 529
 530 /**
 531  * smul32_overflow - multiplication with overflow indication
 532  * @x, @y: Input multipliers
 533  * @ret: Output for product
 534  *
 535  * Computes *@ret = @x * @y, and returns true if and only if that
 536  * value has been truncated.
 537  */
 538 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
 539 {
 540     return __builtin_mul_overflow(x, y, ret);
 541 }
 542
 543 /**
 544  * smul64_overflow - multiplication with overflow indication
 545  * @x, @y: Input multipliers
 546  * @ret: Output for product
 547  *
 548  * Computes *@ret = @x * @y, and returns true if and only if that
 549  * value has been truncated.
 550  */
 551 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
 552 {
 553     return __builtin_mul_overflow(x, y, ret);
 554 }
 555
 556 /**
 557  * umul32_overflow - multiplication with overflow indication
 558  * @x, @y: Input multipliers
 559  * @ret: Output for product
 560  *
 561  * Computes *@ret = @x * @y, and returns true if and only if that
 562  * value has been truncated.
 563  */
 564 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 565 {
 566     return __builtin_mul_overflow(x, y, ret);
 567 }
 568
 569 /**
 570  * umul64_overflow - multiplication with overflow indication
 571  * @x, @y: Input multipliers
 572  * @ret: Output for product
 573  *
 574  * Computes *@ret = @x * @y, and returns true if and only if that
 575  * value has been truncated.
 576  */
 577 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 578 {
 579     return __builtin_mul_overflow(x, y, ret);
 580 }
 581
 582 /*
 583  * Unsigned 128x64 multiplication.
 584  * Returns true if the result got truncated to 128 bits.
 585  * Otherwise, returns false and the multiplication result via plow and phigh.
 586  */
 587 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
 588 {
 589 #if defined(CONFIG_INT128)
 590     bool res;
 591     __uint128_t r;
 592     __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
 593     res = __builtin_mul_overflow(f, factor, &r);
 594
 595     *plow = r;
 596     *phigh = r >> 64;
 597
 598     return res;
 599 #else
 600     uint64_t dhi = *phigh;
 601     uint64_t dlo = *plow;
 602     uint64_t ahi;
 603     uint64_t blo, bhi;
 604
 605     if (dhi == 0) {
 606         mulu64(plow, phigh, dlo, factor);
 607         return false;
 608     }
 609
 610     mulu64(plow, &ahi, dlo, factor);
 611     mulu64(&blo, &bhi, dhi, factor);
 612
 613     return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
 614 #endif
 615 }
 616
 617 /**
 618  * uadd64_carry - addition with carry-in and carry-out
 619  * @x, @y: addends
 620  * @pcarry: in-out carry value
 621  *
 622  * Computes @x + @y + *@pcarry, placing the carry-out back
 623  * into *@pcarry and returning the 64-bit sum.
 624  */
 625 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
 626 {
 627 #if __has_builtin(__builtin_addcll)
 628     unsigned long long c = *pcarry;
 629     x = __builtin_addcll(x, y, c, &c);
 630     *pcarry = c & 1;
 631     return x;
 632 #else
 633     bool c = *pcarry;
 634     /* This is clang's internal expansion of __builtin_addc. */
 635     c = uadd64_overflow(x, c, &x);
 636     c |= uadd64_overflow(x, y, &x);
 637     *pcarry = c;
 638     return x;
 639 #endif
 640 }
 641
 642 /**
 643  * usub64_borrow - subtraction with borrow-in and borrow-out
 644  * @x, @y: addends
 645  * @pborrow: in-out borrow value
 646  *
 647  * Computes @x - @y - *@pborrow, placing the borrow-out back
 648  * into *@pborrow and returning the 64-bit sum.
 649  */
 650 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
 651 {
 652 #if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN)
 653     unsigned long long b = *pborrow;
 654     x = __builtin_subcll(x, y, b, &b);
 655     *pborrow = b & 1;
 656     return x;
 657 #else
 658     bool b = *pborrow;
 659     b = usub64_overflow(x, b, &x);
 660     b |= usub64_overflow(x, y, &x);
 661     *pborrow = b;
 662     return x;
 663 #endif
 664 }
 665
 666 /* Host type specific sizes of these routines.  */
 667
 668 #if ULONG_MAX == UINT32_MAX
 669 # define clzl   clz32
 670 # define ctzl   ctz32
 671 # define clol   clo32
 672 # define ctol   cto32
 673 # define ctpopl ctpop32
 674 # define revbitl revbit32
 675 #elif ULONG_MAX == UINT64_MAX
 676 # define clzl   clz64
 677 # define ctzl   ctz64
 678 # define clol   clo64
 679 # define ctol   cto64
 680 # define ctpopl ctpop64
 681 # define revbitl revbit64
 682 #else
 683 # error Unknown sizeof long
 684 #endif
 685
 686 static inline bool is_power_of_2(uint64_t value)
 687 {
 688     if (!value) {
 689         return false;
 690     }
 691
 692     return !(value & (value - 1));
 693 }
 694
 695 /**
 696  * Return @value rounded down to the nearest power of two or zero.
 697  */
 698 static inline uint64_t pow2floor(uint64_t value)
 699 {
 700     if (!value) {
 701         /* Avoid undefined shift by 64 */
 702         return 0;
 703     }
 704     return 0x8000000000000000ull >> clz64(value);
 705 }
 706
 707 /*
 708  * Return @value rounded up to the nearest power of two modulo 2^64.
 709  * This is *zero* for @value > 2^63, so be careful.
 710  */
 711 static inline uint64_t pow2ceil(uint64_t value)
 712 {
 713     int n = clz64(value - 1);
 714
 715     if (!n) {
 716         /*
 717          * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
 718          * Therefore, either @value == 0 or @value > 2^63.
 719          * If it's 0, return 1, else return 0.
 720          */
 721         return !value;
 722     }
 723     return 0x8000000000000000ull >> (n - 1);
 724 }
 725
 726 static inline uint32_t pow2roundup32(uint32_t x)
 727 {
 728     x |= (x >> 1);
 729     x |= (x >> 2);
 730     x |= (x >> 4);
 731     x |= (x >> 8);
 732     x |= (x >> 16);
 733     return x + 1;
 734 }
 735
 736 /**
 737  * urshift - 128-bit Unsigned Right Shift.
 738  * @plow: in/out - lower 64-bit integer.
 739  * @phigh: in/out - higher 64-bit integer.
 740  * @shift: in - bytes to shift, between 0 and 127.
 741  *
 742  * Result is zero-extended and stored in plow/phigh, which are
 743  * input/output variables. Shift values outside the range will
 744  * be mod to 128. In other words, the caller is responsible to
 745  * verify/assert both the shift range and plow/phigh pointers.
 746  */
 747 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
 748
 749 /**
 750  * ulshift - 128-bit Unsigned Left Shift.
 751  * @plow: in/out - lower 64-bit integer.
 752  * @phigh: in/out - higher 64-bit integer.
 753  * @shift: in - bytes to shift, between 0 and 127.
 754  * @overflow: out - true if any 1-bit is shifted out.
 755  *
 756  * Result is zero-extended and stored in plow/phigh, which are
 757  * input/output variables. Shift values outside the range will
 758  * be mod to 128. In other words, the caller is responsible to
 759  * verify/assert both the shift range and plow/phigh pointers.
 760  */
 761 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 762
 763 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 764  * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 765  *
 766  * Licensed under the GPLv2/LGPLv3
 767  */
 768 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
 769                                   uint64_t n0, uint64_t d)
 770 {
 771 #if defined(__x86_64__)
 772     uint64_t q;
 773     asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 774     return q;
 775 #elif defined(__s390x__) && !defined(__clang__)
 776     /* Need to use a TImode type to get an even register pair for DLGR.  */
 777     unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 778     asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 779     *r = n >> 64;
 780     return n;
 781 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 782     /* From Power ISA 2.06, programming note for divdeu.  */
 783     uint64_t q1, q2, Q, r1, r2, R;
 784     asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 785         : "=&r"(q1), "=r"(q2)
 786         : "r"(n1), "r"(n0), "r"(d));
 787     r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 788     r2 = n0 - (q2 * d);
 789     Q = q1 + q2;
 790     R = r1 + r2;
 791     if (R >= d || R < r2) { /* overflow implies R > d */
 792         Q += 1;
 793         R -= d;
 794     }
 795     *r = R;
 796     return Q;
 797 #else
 798     uint64_t d0, d1, q0, q1, r1, r0, m;
 799
 800     d0 = (uint32_t)d;
 801     d1 = d >> 32;
 802
 803     r1 = n1 % d1;
 804     q1 = n1 / d1;
 805     m = q1 * d0;
 806     r1 = (r1 << 32) | (n0 >> 32);
 807     if (r1 < m) {
 808         q1 -= 1;
 809         r1 += d;
 810         if (r1 >= d) {
 811             if (r1 < m) {
 812                 q1 -= 1;
 813                 r1 += d;
 814             }
 815         }
 816     }
 817     r1 -= m;
 818
 819     r0 = r1 % d1;
 820     q0 = r1 / d1;
 821     m = q0 * d0;
 822     r0 = (r0 << 32) | (uint32_t)n0;
 823     if (r0 < m) {
 824         q0 -= 1;
 825         r0 += d;
 826         if (r0 >= d) {
 827             if (r0 < m) {
 828                 q0 -= 1;
 829                 r0 += d;
 830             }
 831         }
 832     }
 833     r0 -= m;
 834
 835     *r = r0;
 836     return (q1 << 32) | q0;
 837 #endif
 838 }
 839
 840 Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
 841 Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
 842 #endif