include/qemu/host-utils.h

   1 /*
   2  * Utility compute operations used by translated code.
   3  *
   4  * Copyright (c) 2007 Thiemo Seufer
   5  * Copyright (c) 2007 Jocelyn Mayer
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  */
  25
  26 /* Portions of this work are licensed under the terms of the GNU GPL,
  27  * version 2 or later. See the COPYING file in the top-level directory.
  28  */
  29
  30 #ifndef HOST_UTILS_H
  31 #define HOST_UTILS_H
  32
  33 #include "qemu/bswap.h"
  34 #include "qemu/int128.h"
  35
  36 #ifdef CONFIG_INT128
  37 static inline void mulu64(uint64_t *plow, uint64_t *phigh,
  38                           uint64_t a, uint64_t b)
  39 {
  40     __uint128_t r = (__uint128_t)a * b;
  41     *plow = r;
  42     *phigh = r >> 64;
  43 }
  44
  45 static inline void muls64(uint64_t *plow, uint64_t *phigh,
  46                           int64_t a, int64_t b)
  47 {
  48     __int128_t r = (__int128_t)a * b;
  49     *plow = r;
  50     *phigh = r >> 64;
  51 }
  52
  53 /* compute with 96 bit intermediate result: (a*b)/c */
  54 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  55 {
  56     return (__int128_t)a * b / c;
  57 }
  58
  59 static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
  60 {
  61     return ((__int128_t)a * b + c - 1) / c;
  62 }
  63
  64 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
  65                                uint64_t divisor)
  66 {
  67     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
  68     __uint128_t result = dividend / divisor;
  69
  70     *plow = result;
  71     *phigh = result >> 64;
  72     return dividend % divisor;
  73 }
  74
  75 static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
  76                               int64_t divisor)
  77 {
  78     __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
  79     __int128_t result = dividend / divisor;
  80
  81     *plow = result;
  82     *phigh = result >> 64;
  83     return dividend % divisor;
  84 }
  85 #else
  86 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
  87 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
  88 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
  89 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
  90
  91 static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c,
  92                                   bool round_up)
  93 {
  94     union {
  95         uint64_t ll;
  96         struct {
  97 #if HOST_BIG_ENDIAN
  98             uint32_t high, low;
  99 #else
 100             uint32_t low, high;
 101 #endif
 102         } l;
 103     } u, res;
 104     uint64_t rl, rh;
 105
 106     u.ll = a;
 107     rl = (uint64_t)u.l.low * (uint64_t)b;
 108     if (round_up) {
 109         rl += c - 1;
 110     }
 111     rh = (uint64_t)u.l.high * (uint64_t)b;
 112     rh += (rl >> 32);
 113     res.l.high = rh / c;
 114     res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
 115     return res.ll;
 116 }
 117
 118 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 119 {
 120     return muldiv64_rounding(a, b, c, false);
 121 }
 122
 123 static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
 124 {
 125     return muldiv64_rounding(a, b, c, true);
 126 }
 127 #endif
 128
 129 /**
 130  * clz8 - count leading zeros in a 8-bit value.
 131  * @val: The value to search
 132  *
 133  * Returns 8 if the value is zero.  Note that the GCC builtin is
 134  * undefined if the value is zero.
 135  *
 136  * Note that the GCC builtin will upcast its argument to an `unsigned int`
 137  * so this function subtracts off the number of prepended zeroes.
 138  */
 139 static inline int clz8(uint8_t val)
 140 {
 141     return val ? __builtin_clz(val) - 24 : 8;
 142 }
 143
 144 /**
 145  * clz16 - count leading zeros in a 16-bit value.
 146  * @val: The value to search
 147  *
 148  * Returns 16 if the value is zero.  Note that the GCC builtin is
 149  * undefined if the value is zero.
 150  *
 151  * Note that the GCC builtin will upcast its argument to an `unsigned int`
 152  * so this function subtracts off the number of prepended zeroes.
 153  */
 154 static inline int clz16(uint16_t val)
 155 {
 156     return val ? __builtin_clz(val) - 16 : 16;
 157 }
 158
 159 /**
 160  * clz32 - count leading zeros in a 32-bit value.
 161  * @val: The value to search
 162  *
 163  * Returns 32 if the value is zero.  Note that the GCC builtin is
 164  * undefined if the value is zero.
 165  */
 166 static inline int clz32(uint32_t val)
 167 {
 168     return val ? __builtin_clz(val) : 32;
 169 }
 170
 171 /**
 172  * clo32 - count leading ones in a 32-bit value.
 173  * @val: The value to search
 174  *
 175  * Returns 32 if the value is -1.
 176  */
 177 static inline int clo32(uint32_t val)
 178 {
 179     return clz32(~val);
 180 }
 181
 182 /**
 183  * clz64 - count leading zeros in a 64-bit value.
 184  * @val: The value to search
 185  *
 186  * Returns 64 if the value is zero.  Note that the GCC builtin is
 187  * undefined if the value is zero.
 188  */
 189 static inline int clz64(uint64_t val)
 190 {
 191     return val ? __builtin_clzll(val) : 64;
 192 }
 193
 194 /**
 195  * clo64 - count leading ones in a 64-bit value.
 196  * @val: The value to search
 197  *
 198  * Returns 64 if the value is -1.
 199  */
 200 static inline int clo64(uint64_t val)
 201 {
 202     return clz64(~val);
 203 }
 204
 205 /**
 206  * ctz8 - count trailing zeros in a 8-bit value.
 207  * @val: The value to search
 208  *
 209  * Returns 8 if the value is zero.  Note that the GCC builtin is
 210  * undefined if the value is zero.
 211  */
 212 static inline int ctz8(uint8_t val)
 213 {
 214     return val ? __builtin_ctz(val) : 8;
 215 }
 216
 217 /**
 218  * ctz16 - count trailing zeros in a 16-bit value.
 219  * @val: The value to search
 220  *
 221  * Returns 16 if the value is zero.  Note that the GCC builtin is
 222  * undefined if the value is zero.
 223  */
 224 static inline int ctz16(uint16_t val)
 225 {
 226     return val ? __builtin_ctz(val) : 16;
 227 }
 228
 229 /**
 230  * ctz32 - count trailing zeros in a 32-bit value.
 231  * @val: The value to search
 232  *
 233  * Returns 32 if the value is zero.  Note that the GCC builtin is
 234  * undefined if the value is zero.
 235  */
 236 static inline int ctz32(uint32_t val)
 237 {
 238     return val ? __builtin_ctz(val) : 32;
 239 }
 240
 241 /**
 242  * cto32 - count trailing ones in a 32-bit value.
 243  * @val: The value to search
 244  *
 245  * Returns 32 if the value is -1.
 246  */
 247 static inline int cto32(uint32_t val)
 248 {
 249     return ctz32(~val);
 250 }
 251
 252 /**
 253  * ctz64 - count trailing zeros in a 64-bit value.
 254  * @val: The value to search
 255  *
 256  * Returns 64 if the value is zero.  Note that the GCC builtin is
 257  * undefined if the value is zero.
 258  */
 259 static inline int ctz64(uint64_t val)
 260 {
 261     return val ? __builtin_ctzll(val) : 64;
 262 }
 263
 264 /**
 265  * cto64 - count trailing ones in a 64-bit value.
 266  * @val: The value to search
 267  *
 268  * Returns 64 if the value is -1.
 269  */
 270 static inline int cto64(uint64_t val)
 271 {
 272     return ctz64(~val);
 273 }
 274
 275 /**
 276  * clrsb32 - count leading redundant sign bits in a 32-bit value.
 277  * @val: The value to search
 278  *
 279  * Returns the number of bits following the sign bit that are equal to it.
 280  * No special cases; output range is [0-31].
 281  */
 282 static inline int clrsb32(uint32_t val)
 283 {
 284 #if __has_builtin(__builtin_clrsb) || !defined(__clang__)
 285     return __builtin_clrsb(val);
 286 #else
 287     return clz32(val ^ ((int32_t)val >> 1)) - 1;
 288 #endif
 289 }
 290
 291 /**
 292  * clrsb64 - count leading redundant sign bits in a 64-bit value.
 293  * @val: The value to search
 294  *
 295  * Returns the number of bits following the sign bit that are equal to it.
 296  * No special cases; output range is [0-63].
 297  */
 298 static inline int clrsb64(uint64_t val)
 299 {
 300 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__)
 301     return __builtin_clrsbll(val);
 302 #else
 303     return clz64(val ^ ((int64_t)val >> 1)) - 1;
 304 #endif
 305 }
 306
 307 /**
 308  * ctpop8 - count the population of one bits in an 8-bit value.
 309  * @val: The value to search
 310  */
 311 static inline int ctpop8(uint8_t val)
 312 {
 313     return __builtin_popcount(val);
 314 }
 315
 316 /**
 317  * ctpop16 - count the population of one bits in a 16-bit value.
 318  * @val: The value to search
 319  */
 320 static inline int ctpop16(uint16_t val)
 321 {
 322     return __builtin_popcount(val);
 323 }
 324
 325 /**
 326  * ctpop32 - count the population of one bits in a 32-bit value.
 327  * @val: The value to search
 328  */
 329 static inline int ctpop32(uint32_t val)
 330 {
 331     return __builtin_popcount(val);
 332 }
 333
 334 /**
 335  * ctpop64 - count the population of one bits in a 64-bit value.
 336  * @val: The value to search
 337  */
 338 static inline int ctpop64(uint64_t val)
 339 {
 340     return __builtin_popcountll(val);
 341 }
 342
 343 /**
 344  * revbit8 - reverse the bits in an 8-bit value.
 345  * @x: The value to modify.
 346  */
 347 static inline uint8_t revbit8(uint8_t x)
 348 {
 349 #if __has_builtin(__builtin_bitreverse8)
 350     return __builtin_bitreverse8(x);
 351 #else
 352     /* Assign the correct nibble position.  */
 353     x = ((x & 0xf0) >> 4)
 354       | ((x & 0x0f) << 4);
 355     /* Assign the correct bit position.  */
 356     x = ((x & 0x88) >> 3)
 357       | ((x & 0x44) >> 1)
 358       | ((x & 0x22) << 1)
 359       | ((x & 0x11) << 3);
 360     return x;
 361 #endif
 362 }
 363
 364 /**
 365  * revbit16 - reverse the bits in a 16-bit value.
 366  * @x: The value to modify.
 367  */
 368 static inline uint16_t revbit16(uint16_t x)
 369 {
 370 #if __has_builtin(__builtin_bitreverse16)
 371     return __builtin_bitreverse16(x);
 372 #else
 373     /* Assign the correct byte position.  */
 374     x = bswap16(x);
 375     /* Assign the correct nibble position.  */
 376     x = ((x & 0xf0f0) >> 4)
 377       | ((x & 0x0f0f) << 4);
 378     /* Assign the correct bit position.  */
 379     x = ((x & 0x8888) >> 3)
 380       | ((x & 0x4444) >> 1)
 381       | ((x & 0x2222) << 1)
 382       | ((x & 0x1111) << 3);
 383     return x;
 384 #endif
 385 }
 386
 387 /**
 388  * revbit32 - reverse the bits in a 32-bit value.
 389  * @x: The value to modify.
 390  */
 391 static inline uint32_t revbit32(uint32_t x)
 392 {
 393 #if __has_builtin(__builtin_bitreverse32)
 394     return __builtin_bitreverse32(x);
 395 #else
 396     /* Assign the correct byte position.  */
 397     x = bswap32(x);
 398     /* Assign the correct nibble position.  */
 399     x = ((x & 0xf0f0f0f0u) >> 4)
 400       | ((x & 0x0f0f0f0fu) << 4);
 401     /* Assign the correct bit position.  */
 402     x = ((x & 0x88888888u) >> 3)
 403       | ((x & 0x44444444u) >> 1)
 404       | ((x & 0x22222222u) << 1)
 405       | ((x & 0x11111111u) << 3);
 406     return x;
 407 #endif
 408 }
 409
 410 /**
 411  * revbit64 - reverse the bits in a 64-bit value.
 412  * @x: The value to modify.
 413  */
 414 static inline uint64_t revbit64(uint64_t x)
 415 {
 416 #if __has_builtin(__builtin_bitreverse64)
 417     return __builtin_bitreverse64(x);
 418 #else
 419     /* Assign the correct byte position.  */
 420     x = bswap64(x);
 421     /* Assign the correct nibble position.  */
 422     x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4)
 423       | ((x & 0x0f0f0f0f0f0f0f0full) << 4);
 424     /* Assign the correct bit position.  */
 425     x = ((x & 0x8888888888888888ull) >> 3)
 426       | ((x & 0x4444444444444444ull) >> 1)
 427       | ((x & 0x2222222222222222ull) << 1)
 428       | ((x & 0x1111111111111111ull) << 3);
 429     return x;
 430 #endif
 431 }
 432
 433 /**
 434  * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
 435  */
 436 static inline uint64_t uabs64(int64_t v)
 437 {
 438     return v < 0 ? -v : v;
 439 }
 440
 441 /**
 442  * sadd32_overflow - addition with overflow indication
 443  * @x, @y: addends
 444  * @ret: Output for sum
 445  *
 446  * Computes *@ret = @x + @y, and returns true if and only if that
 447  * value has been truncated.
 448  */
 449 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
 450 {
 451     return __builtin_add_overflow(x, y, ret);
 452 }
 453
 454 /**
 455  * sadd64_overflow - addition with overflow indication
 456  * @x, @y: addends
 457  * @ret: Output for sum
 458  *
 459  * Computes *@ret = @x + @y, and returns true if and only if that
 460  * value has been truncated.
 461  */
 462 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
 463 {
 464     return __builtin_add_overflow(x, y, ret);
 465 }
 466
 467 /**
 468  * uadd32_overflow - addition with overflow indication
 469  * @x, @y: addends
 470  * @ret: Output for sum
 471  *
 472  * Computes *@ret = @x + @y, and returns true if and only if that
 473  * value has been truncated.
 474  */
 475 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 476 {
 477     return __builtin_add_overflow(x, y, ret);
 478 }
 479
 480 /**
 481  * uadd64_overflow - addition with overflow indication
 482  * @x, @y: addends
 483  * @ret: Output for sum
 484  *
 485  * Computes *@ret = @x + @y, and returns true if and only if that
 486  * value has been truncated.
 487  */
 488 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 489 {
 490     return __builtin_add_overflow(x, y, ret);
 491 }
 492
 493 /**
 494  * ssub32_overflow - subtraction with overflow indication
 495  * @x: Minuend
 496  * @y: Subtrahend
 497  * @ret: Output for difference
 498  *
 499  * Computes *@ret = @x - @y, and returns true if and only if that
 500  * value has been truncated.
 501  */
 502 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
 503 {
 504     return __builtin_sub_overflow(x, y, ret);
 505 }
 506
 507 /**
 508  * ssub64_overflow - subtraction with overflow indication
 509  * @x: Minuend
 510  * @y: Subtrahend
 511  * @ret: Output for sum
 512  *
 513  * Computes *@ret = @x - @y, and returns true if and only if that
 514  * value has been truncated.
 515  */
 516 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
 517 {
 518     return __builtin_sub_overflow(x, y, ret);
 519 }
 520
 521 /**
 522  * usub32_overflow - subtraction with overflow indication
 523  * @x: Minuend
 524  * @y: Subtrahend
 525  * @ret: Output for sum
 526  *
 527  * Computes *@ret = @x - @y, and returns true if and only if that
 528  * value has been truncated.
 529  */
 530 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 531 {
 532     return __builtin_sub_overflow(x, y, ret);
 533 }
 534
 535 /**
 536  * usub64_overflow - subtraction with overflow indication
 537  * @x: Minuend
 538  * @y: Subtrahend
 539  * @ret: Output for sum
 540  *
 541  * Computes *@ret = @x - @y, and returns true if and only if that
 542  * value has been truncated.
 543  */
 544 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 545 {
 546     return __builtin_sub_overflow(x, y, ret);
 547 }
 548
 549 /**
 550  * smul32_overflow - multiplication with overflow indication
 551  * @x, @y: Input multipliers
 552  * @ret: Output for product
 553  *
 554  * Computes *@ret = @x * @y, and returns true if and only if that
 555  * value has been truncated.
 556  */
 557 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
 558 {
 559     return __builtin_mul_overflow(x, y, ret);
 560 }
 561
 562 /**
 563  * smul64_overflow - multiplication with overflow indication
 564  * @x, @y: Input multipliers
 565  * @ret: Output for product
 566  *
 567  * Computes *@ret = @x * @y, and returns true if and only if that
 568  * value has been truncated.
 569  */
 570 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
 571 {
 572     return __builtin_mul_overflow(x, y, ret);
 573 }
 574
 575 /**
 576  * umul32_overflow - multiplication with overflow indication
 577  * @x, @y: Input multipliers
 578  * @ret: Output for product
 579  *
 580  * Computes *@ret = @x * @y, and returns true if and only if that
 581  * value has been truncated.
 582  */
 583 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
 584 {
 585     return __builtin_mul_overflow(x, y, ret);
 586 }
 587
 588 /**
 589  * umul64_overflow - multiplication with overflow indication
 590  * @x, @y: Input multipliers
 591  * @ret: Output for product
 592  *
 593  * Computes *@ret = @x * @y, and returns true if and only if that
 594  * value has been truncated.
 595  */
 596 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
 597 {
 598     return __builtin_mul_overflow(x, y, ret);
 599 }
 600
 601 /*
 602  * Unsigned 128x64 multiplication.
 603  * Returns true if the result got truncated to 128 bits.
 604  * Otherwise, returns false and the multiplication result via plow and phigh.
 605  */
 606 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
 607 {
 608 #if defined(CONFIG_INT128)
 609     bool res;
 610     __uint128_t r;
 611     __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
 612     res = __builtin_mul_overflow(f, factor, &r);
 613
 614     *plow = r;
 615     *phigh = r >> 64;
 616
 617     return res;
 618 #else
 619     uint64_t dhi = *phigh;
 620     uint64_t dlo = *plow;
 621     uint64_t ahi;
 622     uint64_t blo, bhi;
 623
 624     if (dhi == 0) {
 625         mulu64(plow, phigh, dlo, factor);
 626         return false;
 627     }
 628
 629     mulu64(plow, &ahi, dlo, factor);
 630     mulu64(&blo, &bhi, dhi, factor);
 631
 632     return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
 633 #endif
 634 }
 635
 636 /**
 637  * uadd64_carry - addition with carry-in and carry-out
 638  * @x, @y: addends
 639  * @pcarry: in-out carry value
 640  *
 641  * Computes @x + @y + *@pcarry, placing the carry-out back
 642  * into *@pcarry and returning the 64-bit sum.
 643  */
 644 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
 645 {
 646 #if __has_builtin(__builtin_addcll)
 647     unsigned long long c = *pcarry;
 648     x = __builtin_addcll(x, y, c, &c);
 649     *pcarry = c & 1;
 650     return x;
 651 #else
 652     bool c = *pcarry;
 653     /* This is clang's internal expansion of __builtin_addc. */
 654     c = uadd64_overflow(x, c, &x);
 655     c |= uadd64_overflow(x, y, &x);
 656     *pcarry = c;
 657     return x;
 658 #endif
 659 }
 660
 661 /**
 662  * usub64_borrow - subtraction with borrow-in and borrow-out
 663  * @x, @y: addends
 664  * @pborrow: in-out borrow value
 665  *
 666  * Computes @x - @y - *@pborrow, placing the borrow-out back
 667  * into *@pborrow and returning the 64-bit sum.
 668  */
 669 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
 670 {
 671 #if __has_builtin(__builtin_subcll) && !defined(BUILTIN_SUBCLL_BROKEN)
 672     unsigned long long b = *pborrow;
 673     x = __builtin_subcll(x, y, b, &b);
 674     *pborrow = b & 1;
 675     return x;
 676 #else
 677     bool b = *pborrow;
 678     b = usub64_overflow(x, b, &x);
 679     b |= usub64_overflow(x, y, &x);
 680     *pborrow = b;
 681     return x;
 682 #endif
 683 }
 684
 685 /* Host type specific sizes of these routines.  */
 686
 687 #if ULONG_MAX == UINT32_MAX
 688 # define clzl   clz32
 689 # define ctzl   ctz32
 690 # define clol   clo32
 691 # define ctol   cto32
 692 # define ctpopl ctpop32
 693 # define revbitl revbit32
 694 #elif ULONG_MAX == UINT64_MAX
 695 # define clzl   clz64
 696 # define ctzl   ctz64
 697 # define clol   clo64
 698 # define ctol   cto64
 699 # define ctpopl ctpop64
 700 # define revbitl revbit64
 701 #else
 702 # error Unknown sizeof long
 703 #endif
 704
 705 static inline bool is_power_of_2(uint64_t value)
 706 {
 707     if (!value) {
 708         return false;
 709     }
 710
 711     return !(value & (value - 1));
 712 }
 713
 714 /**
 715  * Return @value rounded down to the nearest power of two or zero.
 716  */
 717 static inline uint64_t pow2floor(uint64_t value)
 718 {
 719     if (!value) {
 720         /* Avoid undefined shift by 64 */
 721         return 0;
 722     }
 723     return 0x8000000000000000ull >> clz64(value);
 724 }
 725
 726 /*
 727  * Return @value rounded up to the nearest power of two modulo 2^64.
 728  * This is *zero* for @value > 2^63, so be careful.
 729  */
 730 static inline uint64_t pow2ceil(uint64_t value)
 731 {
 732     int n = clz64(value - 1);
 733
 734     if (!n) {
 735         /*
 736          * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63
 737          * Therefore, either @value == 0 or @value > 2^63.
 738          * If it's 0, return 1, else return 0.
 739          */
 740         return !value;
 741     }
 742     return 0x8000000000000000ull >> (n - 1);
 743 }
 744
 745 static inline uint32_t pow2roundup32(uint32_t x)
 746 {
 747     x |= (x >> 1);
 748     x |= (x >> 2);
 749     x |= (x >> 4);
 750     x |= (x >> 8);
 751     x |= (x >> 16);
 752     return x + 1;
 753 }
 754
 755 /**
 756  * urshift - 128-bit Unsigned Right Shift.
 757  * @plow: in/out - lower 64-bit integer.
 758  * @phigh: in/out - higher 64-bit integer.
 759  * @shift: in - bytes to shift, between 0 and 127.
 760  *
 761  * Result is zero-extended and stored in plow/phigh, which are
 762  * input/output variables. Shift values outside the range will
 763  * be mod to 128. In other words, the caller is responsible to
 764  * verify/assert both the shift range and plow/phigh pointers.
 765  */
 766 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
 767
 768 /**
 769  * ulshift - 128-bit Unsigned Left Shift.
 770  * @plow: in/out - lower 64-bit integer.
 771  * @phigh: in/out - higher 64-bit integer.
 772  * @shift: in - bytes to shift, between 0 and 127.
 773  * @overflow: out - true if any 1-bit is shifted out.
 774  *
 775  * Result is zero-extended and stored in plow/phigh, which are
 776  * input/output variables. Shift values outside the range will
 777  * be mod to 128. In other words, the caller is responsible to
 778  * verify/assert both the shift range and plow/phigh pointers.
 779  */
 780 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 781
 782 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
 783  * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
 784  *
 785  * Licensed under the GPLv2/LGPLv3
 786  */
 787 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
 788                                   uint64_t n0, uint64_t d)
 789 {
 790 #if defined(__x86_64__)
 791     uint64_t q;
 792     asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
 793     return q;
 794 #elif defined(__s390x__) && !defined(__clang__)
 795     /* Need to use a TImode type to get an even register pair for DLGR.  */
 796     unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
 797     asm("dlgr %0, %1" : "+r"(n) : "r"(d));
 798     *r = n >> 64;
 799     return n;
 800 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
 801     /* From Power ISA 2.06, programming note for divdeu.  */
 802     uint64_t q1, q2, Q, r1, r2, R;
 803     asm("divdeu %0,%2,%4; divdu %1,%3,%4"
 804         : "=&r"(q1), "=r"(q2)
 805         : "r"(n1), "r"(n0), "r"(d));
 806     r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
 807     r2 = n0 - (q2 * d);
 808     Q = q1 + q2;
 809     R = r1 + r2;
 810     if (R >= d || R < r2) { /* overflow implies R > d */
 811         Q += 1;
 812         R -= d;
 813     }
 814     *r = R;
 815     return Q;
 816 #else
 817     uint64_t d0, d1, q0, q1, r1, r0, m;
 818
 819     d0 = (uint32_t)d;
 820     d1 = d >> 32;
 821
 822     r1 = n1 % d1;
 823     q1 = n1 / d1;
 824     m = q1 * d0;
 825     r1 = (r1 << 32) | (n0 >> 32);
 826     if (r1 < m) {
 827         q1 -= 1;
 828         r1 += d;
 829         if (r1 >= d) {
 830             if (r1 < m) {
 831                 q1 -= 1;
 832                 r1 += d;
 833             }
 834         }
 835     }
 836     r1 -= m;
 837
 838     r0 = r1 % d1;
 839     q0 = r1 / d1;
 840     m = q0 * d0;
 841     r0 = (r0 << 32) | (uint32_t)n0;
 842     if (r0 < m) {
 843         q0 -= 1;
 844         r0 += d;
 845         if (r0 >= d) {
 846             if (r0 < m) {
 847                 q0 -= 1;
 848                 r0 += d;
 849             }
 850         }
 851     }
 852     r0 -= m;
 853
 854     *r = r0;
 855     return (q1 << 32) | q0;
 856 #endif
 857 }
 858
 859 Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor);
 860 Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor);
 861 #endif