]>
Commit | Line | Data |
---|---|---|
05f778c8 TS |
1 | /* |
2 | * Utility compute operations used by translated code. | |
3 | * | |
4 | * Copyright (c) 2007 Thiemo Seufer | |
5 | * Copyright (c) 2007 Jocelyn Mayer | |
6 | * | |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | * of this software and associated documentation files (the "Software"), to deal | |
9 | * in the Software without restriction, including without limitation the rights | |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | * copies of the Software, and to permit persons to whom the Software is | |
12 | * furnished to do so, subject to the following conditions: | |
13 | * | |
14 | * The above copyright notice and this permission notice shall be included in | |
15 | * all copies or substantial portions of the Software. | |
16 | * | |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | * THE SOFTWARE. | |
24 | */ | |
175de524 | 25 | |
cb9c377f | 26 | #ifndef HOST_UTILS_H |
175de524 | 27 | #define HOST_UTILS_H |
05f778c8 | 28 | |
1ec8070e | 29 | #include "qemu/compiler.h" |
652a4b7e | 30 | #include "qemu/bswap.h" |
cebdff77 | 31 | |
f540166b | 32 | #ifdef CONFIG_INT128 |
facd2857 BS |
33 | static inline void mulu64(uint64_t *plow, uint64_t *phigh, |
34 | uint64_t a, uint64_t b) | |
7a51ad82 | 35 | { |
f540166b RH |
36 | __uint128_t r = (__uint128_t)a * b; |
37 | *plow = r; | |
38 | *phigh = r >> 64; | |
7a51ad82 | 39 | } |
f540166b | 40 | |
facd2857 BS |
41 | static inline void muls64(uint64_t *plow, uint64_t *phigh, |
42 | int64_t a, int64_t b) | |
7a51ad82 | 43 | { |
f540166b RH |
44 | __int128_t r = (__int128_t)a * b; |
45 | *plow = r; | |
46 | *phigh = r >> 64; | |
7a51ad82 | 47 | } |
98d1eb27 | 48 | |
49caffe0 PM |
49 | /* compute with 96 bit intermediate result: (a*b)/c */ |
50 | static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) | |
51 | { | |
52 | return (__int128_t)a * b / c; | |
53 | } | |
54 | ||
98d1eb27 TM |
55 | static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) |
56 | { | |
57 | if (divisor == 0) { | |
58 | return 1; | |
59 | } else { | |
60 | __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; | |
61 | __uint128_t result = dividend / divisor; | |
62 | *plow = result; | |
63 | *phigh = dividend % divisor; | |
64 | return result > UINT64_MAX; | |
65 | } | |
66 | } | |
e44259b6 TM |
67 | |
68 | static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) | |
69 | { | |
70 | if (divisor == 0) { | |
71 | return 1; | |
72 | } else { | |
73 | __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; | |
74 | __int128_t result = dividend / divisor; | |
75 | *plow = result; | |
76 | *phigh = dividend % divisor; | |
77 | return result != *plow; | |
78 | } | |
79 | } | |
7a51ad82 | 80 | #else |
db7b62e7 LP |
81 | void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); |
82 | void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); | |
98d1eb27 | 83 | int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); |
e44259b6 | 84 | int divs128(int64_t *plow, int64_t *phigh, int64_t divisor); |
49caffe0 PM |
85 | |
86 | static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) | |
87 | { | |
88 | union { | |
89 | uint64_t ll; | |
90 | struct { | |
91 | #ifdef HOST_WORDS_BIGENDIAN | |
92 | uint32_t high, low; | |
93 | #else | |
94 | uint32_t low, high; | |
95 | #endif | |
96 | } l; | |
97 | } u, res; | |
98 | uint64_t rl, rh; | |
99 | ||
100 | u.ll = a; | |
101 | rl = (uint64_t)u.l.low * (uint64_t)b; | |
102 | rh = (uint64_t)u.l.high * (uint64_t)b; | |
103 | rh += (rl >> 32); | |
104 | res.l.high = rh / c; | |
105 | res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; | |
106 | return res.ll; | |
107 | } | |
7a51ad82 JM |
108 | #endif |
109 | ||
72d81155 RH |
110 | /** |
111 | * clz32 - count leading zeros in a 32-bit value. | |
112 | * @val: The value to search | |
113 | * | |
114 | * Returns 32 if the value is zero. Note that the GCC builtin is | |
115 | * undefined if the value is zero. | |
116 | */ | |
facd2857 | 117 | static inline int clz32(uint32_t val) |
05f778c8 | 118 | { |
72d81155 | 119 | return val ? __builtin_clz(val) : 32; |
05f778c8 TS |
120 | } |
121 | ||
72d81155 RH |
122 | /** |
123 | * clo32 - count leading ones in a 32-bit value. | |
124 | * @val: The value to search | |
125 | * | |
126 | * Returns 32 if the value is -1. | |
127 | */ | |
facd2857 | 128 | static inline int clo32(uint32_t val) |
05f778c8 TS |
129 | { |
130 | return clz32(~val); | |
131 | } | |
132 | ||
72d81155 RH |
133 | /** |
134 | * clz64 - count leading zeros in a 64-bit value. | |
135 | * @val: The value to search | |
136 | * | |
137 | * Returns 64 if the value is zero. Note that the GCC builtin is | |
138 | * undefined if the value is zero. | |
139 | */ | |
facd2857 | 140 | static inline int clz64(uint64_t val) |
05f778c8 | 141 | { |
72d81155 | 142 | return val ? __builtin_clzll(val) : 64; |
05f778c8 TS |
143 | } |
144 | ||
72d81155 RH |
145 | /** |
146 | * clo64 - count leading ones in a 64-bit value. | |
147 | * @val: The value to search | |
148 | * | |
149 | * Returns 64 if the value is -1. | |
150 | */ | |
facd2857 | 151 | static inline int clo64(uint64_t val) |
05f778c8 TS |
152 | { |
153 | return clz64(~val); | |
154 | } | |
b9ef45ff | 155 | |
72d81155 RH |
156 | /** |
157 | * ctz32 - count trailing zeros in a 32-bit value. | |
158 | * @val: The value to search | |
159 | * | |
160 | * Returns 32 if the value is zero. Note that the GCC builtin is | |
161 | * undefined if the value is zero. | |
162 | */ | |
facd2857 | 163 | static inline int ctz32(uint32_t val) |
b9ef45ff | 164 | { |
72d81155 | 165 | return val ? __builtin_ctz(val) : 32; |
c8906845 AZ |
166 | } |
167 | ||
72d81155 RH |
168 | /** |
169 | * cto32 - count trailing ones in a 32-bit value. | |
170 | * @val: The value to search | |
171 | * | |
172 | * Returns 32 if the value is -1. | |
173 | */ | |
facd2857 | 174 | static inline int cto32(uint32_t val) |
c8906845 | 175 | { |
b9ef45ff JM |
176 | return ctz32(~val); |
177 | } | |
178 | ||
72d81155 RH |
179 | /** |
180 | * ctz64 - count trailing zeros in a 64-bit value. | |
181 | * @val: The value to search | |
182 | * | |
183 | * Returns 64 if the value is zero. Note that the GCC builtin is | |
184 | * undefined if the value is zero. | |
185 | */ | |
facd2857 | 186 | static inline int ctz64(uint64_t val) |
b9ef45ff | 187 | { |
72d81155 | 188 | return val ? __builtin_ctzll(val) : 64; |
b9ef45ff JM |
189 | } |
190 | ||
72d81155 | 191 | /** |
1c884abe | 192 | * cto64 - count trailing ones in a 64-bit value. |
72d81155 RH |
193 | * @val: The value to search |
194 | * | |
195 | * Returns 64 if the value is -1. | |
196 | */ | |
facd2857 | 197 | static inline int cto64(uint64_t val) |
b9ef45ff JM |
198 | { |
199 | return ctz64(~val); | |
200 | } | |
201 | ||
afd3fe4c CF |
202 | /** |
203 | * clrsb32 - count leading redundant sign bits in a 32-bit value. | |
204 | * @val: The value to search | |
205 | * | |
206 | * Returns the number of bits following the sign bit that are equal to it. | |
207 | * No special cases; output range is [0-31]. | |
208 | */ | |
209 | static inline int clrsb32(uint32_t val) | |
210 | { | |
f773b423 | 211 | #if __has_builtin(__builtin_clrsb) || !defined(__clang__) |
afd3fe4c CF |
212 | return __builtin_clrsb(val); |
213 | #else | |
214 | return clz32(val ^ ((int32_t)val >> 1)) - 1; | |
215 | #endif | |
216 | } | |
217 | ||
218 | /** | |
219 | * clrsb64 - count leading redundant sign bits in a 64-bit value. | |
220 | * @val: The value to search | |
221 | * | |
222 | * Returns the number of bits following the sign bit that are equal to it. | |
223 | * No special cases; output range is [0-63]. | |
224 | */ | |
225 | static inline int clrsb64(uint64_t val) | |
226 | { | |
f773b423 | 227 | #if __has_builtin(__builtin_clrsbll) || !defined(__clang__) |
afd3fe4c CF |
228 | return __builtin_clrsbll(val); |
229 | #else | |
230 | return clz64(val ^ ((int64_t)val >> 1)) - 1; | |
231 | #endif | |
232 | } | |
233 | ||
72d81155 RH |
234 | /** |
235 | * ctpop8 - count the population of one bits in an 8-bit value. | |
236 | * @val: The value to search | |
237 | */ | |
facd2857 | 238 | static inline int ctpop8(uint8_t val) |
b9ef45ff | 239 | { |
72d81155 | 240 | return __builtin_popcount(val); |
b9ef45ff JM |
241 | } |
242 | ||
72d81155 RH |
243 | /** |
244 | * ctpop16 - count the population of one bits in a 16-bit value. | |
245 | * @val: The value to search | |
246 | */ | |
facd2857 | 247 | static inline int ctpop16(uint16_t val) |
b9ef45ff | 248 | { |
72d81155 | 249 | return __builtin_popcount(val); |
b9ef45ff JM |
250 | } |
251 | ||
72d81155 RH |
252 | /** |
253 | * ctpop32 - count the population of one bits in a 32-bit value. | |
254 | * @val: The value to search | |
255 | */ | |
facd2857 | 256 | static inline int ctpop32(uint32_t val) |
b9ef45ff | 257 | { |
7d019980 | 258 | return __builtin_popcount(val); |
b9ef45ff JM |
259 | } |
260 | ||
72d81155 RH |
261 | /** |
262 | * ctpop64 - count the population of one bits in a 64-bit value. | |
263 | * @val: The value to search | |
264 | */ | |
facd2857 | 265 | static inline int ctpop64(uint64_t val) |
b9ef45ff | 266 | { |
7d019980 | 267 | return __builtin_popcountll(val); |
3800af9e | 268 | } |
cb9c377f | 269 | |
652a4b7e RH |
270 | /** |
271 | * revbit8 - reverse the bits in an 8-bit value. | |
272 | * @x: The value to modify. | |
273 | */ | |
274 | static inline uint8_t revbit8(uint8_t x) | |
275 | { | |
5140d6be RH |
276 | #if __has_builtin(__builtin_bitreverse8) |
277 | return __builtin_bitreverse8(x); | |
278 | #else | |
652a4b7e RH |
279 | /* Assign the correct nibble position. */ |
280 | x = ((x & 0xf0) >> 4) | |
281 | | ((x & 0x0f) << 4); | |
282 | /* Assign the correct bit position. */ | |
283 | x = ((x & 0x88) >> 3) | |
284 | | ((x & 0x44) >> 1) | |
285 | | ((x & 0x22) << 1) | |
286 | | ((x & 0x11) << 3); | |
287 | return x; | |
5140d6be | 288 | #endif |
652a4b7e RH |
289 | } |
290 | ||
291 | /** | |
292 | * revbit16 - reverse the bits in a 16-bit value. | |
293 | * @x: The value to modify. | |
294 | */ | |
295 | static inline uint16_t revbit16(uint16_t x) | |
296 | { | |
5140d6be RH |
297 | #if __has_builtin(__builtin_bitreverse16) |
298 | return __builtin_bitreverse16(x); | |
299 | #else | |
652a4b7e RH |
300 | /* Assign the correct byte position. */ |
301 | x = bswap16(x); | |
302 | /* Assign the correct nibble position. */ | |
303 | x = ((x & 0xf0f0) >> 4) | |
304 | | ((x & 0x0f0f) << 4); | |
305 | /* Assign the correct bit position. */ | |
306 | x = ((x & 0x8888) >> 3) | |
307 | | ((x & 0x4444) >> 1) | |
308 | | ((x & 0x2222) << 1) | |
309 | | ((x & 0x1111) << 3); | |
310 | return x; | |
5140d6be | 311 | #endif |
652a4b7e RH |
312 | } |
313 | ||
314 | /** | |
315 | * revbit32 - reverse the bits in a 32-bit value. | |
316 | * @x: The value to modify. | |
317 | */ | |
318 | static inline uint32_t revbit32(uint32_t x) | |
319 | { | |
5140d6be RH |
320 | #if __has_builtin(__builtin_bitreverse32) |
321 | return __builtin_bitreverse32(x); | |
322 | #else | |
652a4b7e RH |
323 | /* Assign the correct byte position. */ |
324 | x = bswap32(x); | |
325 | /* Assign the correct nibble position. */ | |
326 | x = ((x & 0xf0f0f0f0u) >> 4) | |
327 | | ((x & 0x0f0f0f0fu) << 4); | |
328 | /* Assign the correct bit position. */ | |
329 | x = ((x & 0x88888888u) >> 3) | |
330 | | ((x & 0x44444444u) >> 1) | |
331 | | ((x & 0x22222222u) << 1) | |
332 | | ((x & 0x11111111u) << 3); | |
333 | return x; | |
5140d6be | 334 | #endif |
652a4b7e RH |
335 | } |
336 | ||
337 | /** | |
338 | * revbit64 - reverse the bits in a 64-bit value. | |
339 | * @x: The value to modify. | |
340 | */ | |
341 | static inline uint64_t revbit64(uint64_t x) | |
342 | { | |
5140d6be RH |
343 | #if __has_builtin(__builtin_bitreverse64) |
344 | return __builtin_bitreverse64(x); | |
345 | #else | |
652a4b7e RH |
346 | /* Assign the correct byte position. */ |
347 | x = bswap64(x); | |
348 | /* Assign the correct nibble position. */ | |
349 | x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4) | |
350 | | ((x & 0x0f0f0f0f0f0f0f0full) << 4); | |
351 | /* Assign the correct bit position. */ | |
352 | x = ((x & 0x8888888888888888ull) >> 3) | |
353 | | ((x & 0x4444444444444444ull) >> 1) | |
354 | | ((x & 0x2222222222222222ull) << 1) | |
355 | | ((x & 0x1111111111111111ull) << 3); | |
356 | return x; | |
5140d6be | 357 | #endif |
652a4b7e RH |
358 | } |
359 | ||
cec07c0b RH |
360 | /** |
361 | * sadd32_overflow - addition with overflow indication | |
362 | * @x, @y: addends | |
363 | * @ret: Output for sum | |
364 | * | |
365 | * Computes *@ret = @x + @y, and returns true if and only if that | |
366 | * value has been truncated. | |
367 | */ | |
368 | static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret) | |
369 | { | |
370 | #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 | |
371 | return __builtin_add_overflow(x, y, ret); | |
372 | #else | |
373 | *ret = x + y; | |
374 | return ((*ret ^ x) & ~(x ^ y)) < 0; | |
375 | #endif | |
376 | } | |
377 | ||
378 | /** | |
379 | * sadd64_overflow - addition with overflow indication | |
380 | * @x, @y: addends | |
381 | * @ret: Output for sum | |
382 | * | |
383 | * Computes *@ret = @x + @y, and returns true if and only if that | |
384 | * value has been truncated. | |
385 | */ | |
386 | static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret) | |
387 | { | |
388 | #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 | |
389 | return __builtin_add_overflow(x, y, ret); | |
390 | #else | |
391 | *ret = x + y; | |
392 | return ((*ret ^ x) & ~(x ^ y)) < 0; | |
393 | #endif | |
394 | } | |
395 | ||
396 | /** | |
397 | * uadd32_overflow - addition with overflow indication | |
398 | * @x, @y: addends | |
399 | * @ret: Output for sum | |
400 | * | |
401 | * Computes *@ret = @x + @y, and returns true if and only if that | |
402 | * value has been truncated. | |
403 | */ | |
404 | static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret) | |
405 | { | |
406 | #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 | |
407 | return __builtin_add_overflow(x, y, ret); | |
408 | #else | |
409 | *ret = x + y; | |
410 | return *ret < x; | |
411 | #endif | |
412 | } | |
413 | ||
414 | /** | |
415 | * uadd64_overflow - addition with overflow indication | |
416 | * @x, @y: addends | |
417 | * @ret: Output for sum | |
418 | * | |
419 | * Computes *@ret = @x + @y, and returns true if and only if that | |
420 | * value has been truncated. | |
421 | */ | |
422 | static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret) | |
423 | { | |
424 | #if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5 | |
425 | return __builtin_add_overflow(x, y, ret); | |
426 | #else | |
427 | *ret = x + y; | |
428 | return *ret < x; | |
429 | #endif | |
430 | } | |
431 | ||
432 | /** | |
433 | * ssub32_overflow - subtraction with overflow indication | |
434 | * @x: Minuend | |
435 | * @y: Subtrahend | |
436 | * @ret: Output for difference | |
437 | * | |
438 | * Computes *@ret = @x - @y, and returns true if and only if that | |
439 | * value has been truncated. | |
440 | */ | |
441 | static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret) | |
442 | { | |
443 | #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 | |
444 | return __builtin_sub_overflow(x, y, ret); | |
445 | #else | |
446 | *ret = x - y; | |
447 | return ((*ret ^ x) & (x ^ y)) < 0; | |
448 | #endif | |
449 | } | |
450 | ||
451 | /** | |
452 | * ssub64_overflow - subtraction with overflow indication | |
453 | * @x: Minuend | |
454 | * @y: Subtrahend | |
455 | * @ret: Output for sum | |
456 | * | |
457 | * Computes *@ret = @x - @y, and returns true if and only if that | |
458 | * value has been truncated. | |
459 | */ | |
460 | static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret) | |
461 | { | |
462 | #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 | |
463 | return __builtin_sub_overflow(x, y, ret); | |
464 | #else | |
465 | *ret = x - y; | |
466 | return ((*ret ^ x) & (x ^ y)) < 0; | |
467 | #endif | |
468 | } | |
469 | ||
470 | /** | |
471 | * usub32_overflow - subtraction with overflow indication | |
472 | * @x: Minuend | |
473 | * @y: Subtrahend | |
474 | * @ret: Output for sum | |
475 | * | |
476 | * Computes *@ret = @x - @y, and returns true if and only if that | |
477 | * value has been truncated. | |
478 | */ | |
479 | static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret) | |
480 | { | |
481 | #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 | |
482 | return __builtin_sub_overflow(x, y, ret); | |
483 | #else | |
484 | *ret = x - y; | |
485 | return x < y; | |
486 | #endif | |
487 | } | |
488 | ||
489 | /** | |
490 | * usub64_overflow - subtraction with overflow indication | |
491 | * @x: Minuend | |
492 | * @y: Subtrahend | |
493 | * @ret: Output for sum | |
494 | * | |
495 | * Computes *@ret = @x - @y, and returns true if and only if that | |
496 | * value has been truncated. | |
497 | */ | |
498 | static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret) | |
499 | { | |
500 | #if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5 | |
501 | return __builtin_sub_overflow(x, y, ret); | |
502 | #else | |
503 | *ret = x - y; | |
504 | return x < y; | |
505 | #endif | |
506 | } | |
507 | ||
508 | /** | |
509 | * smul32_overflow - multiplication with overflow indication | |
510 | * @x, @y: Input multipliers | |
511 | * @ret: Output for product | |
512 | * | |
513 | * Computes *@ret = @x * @y, and returns true if and only if that | |
514 | * value has been truncated. | |
515 | */ | |
516 | static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret) | |
517 | { | |
518 | #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 | |
519 | return __builtin_mul_overflow(x, y, ret); | |
520 | #else | |
521 | int64_t z = (int64_t)x * y; | |
522 | *ret = z; | |
523 | return *ret != z; | |
524 | #endif | |
525 | } | |
526 | ||
527 | /** | |
528 | * smul64_overflow - multiplication with overflow indication | |
529 | * @x, @y: Input multipliers | |
530 | * @ret: Output for product | |
531 | * | |
532 | * Computes *@ret = @x * @y, and returns true if and only if that | |
533 | * value has been truncated. | |
534 | */ | |
535 | static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret) | |
536 | { | |
537 | #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 | |
538 | return __builtin_mul_overflow(x, y, ret); | |
539 | #else | |
540 | uint64_t hi, lo; | |
541 | muls64(&lo, &hi, x, y); | |
542 | *ret = lo; | |
543 | return hi != ((int64_t)lo >> 63); | |
544 | #endif | |
545 | } | |
546 | ||
547 | /** | |
548 | * umul32_overflow - multiplication with overflow indication | |
549 | * @x, @y: Input multipliers | |
550 | * @ret: Output for product | |
551 | * | |
552 | * Computes *@ret = @x * @y, and returns true if and only if that | |
553 | * value has been truncated. | |
554 | */ | |
555 | static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret) | |
556 | { | |
557 | #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 | |
558 | return __builtin_mul_overflow(x, y, ret); | |
559 | #else | |
560 | uint64_t z = (uint64_t)x * y; | |
561 | *ret = z; | |
562 | return z > UINT32_MAX; | |
563 | #endif | |
564 | } | |
565 | ||
566 | /** | |
567 | * umul64_overflow - multiplication with overflow indication | |
568 | * @x, @y: Input multipliers | |
569 | * @ret: Output for product | |
570 | * | |
571 | * Computes *@ret = @x * @y, and returns true if and only if that | |
572 | * value has been truncated. | |
573 | */ | |
574 | static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret) | |
575 | { | |
576 | #if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5 | |
577 | return __builtin_mul_overflow(x, y, ret); | |
578 | #else | |
579 | uint64_t hi; | |
580 | mulu64(ret, &hi, x, y); | |
581 | return hi != 0; | |
582 | #endif | |
583 | } | |
584 | ||
1ec8070e RH |
585 | /** |
586 | * uadd64_carry - addition with carry-in and carry-out | |
587 | * @x, @y: addends | |
588 | * @pcarry: in-out carry value | |
589 | * | |
590 | * Computes @x + @y + *@pcarry, placing the carry-out back | |
591 | * into *@pcarry and returning the 64-bit sum. | |
592 | */ | |
593 | static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) | |
594 | { | |
595 | #if __has_builtin(__builtin_addcll) | |
596 | unsigned long long c = *pcarry; | |
597 | x = __builtin_addcll(x, y, c, &c); | |
598 | *pcarry = c & 1; | |
599 | return x; | |
600 | #else | |
601 | bool c = *pcarry; | |
602 | /* This is clang's internal expansion of __builtin_addc. */ | |
603 | c = uadd64_overflow(x, c, &x); | |
604 | c |= uadd64_overflow(x, y, &x); | |
605 | *pcarry = c; | |
606 | return x; | |
607 | #endif | |
608 | } | |
609 | ||
610 | /** | |
611 | * usub64_borrow - subtraction with borrow-in and borrow-out | |
612 | * @x, @y: addends | |
613 | * @pborrow: in-out borrow value | |
614 | * | |
615 | * Computes @x - @y - *@pborrow, placing the borrow-out back | |
616 | * into *@pborrow and returning the 64-bit sum. | |
617 | */ | |
618 | static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) | |
619 | { | |
620 | #if __has_builtin(__builtin_subcll) | |
621 | unsigned long long b = *pborrow; | |
622 | x = __builtin_subcll(x, y, b, &b); | |
623 | *pborrow = b & 1; | |
624 | return x; | |
625 | #else | |
626 | bool b = *pborrow; | |
627 | b = usub64_overflow(x, b, &x); | |
628 | b |= usub64_overflow(x, y, &x); | |
629 | *pborrow = b; | |
630 | return x; | |
631 | #endif | |
632 | } | |
633 | ||
01654373 RH |
634 | /* Host type specific sizes of these routines. */ |
635 | ||
636 | #if ULONG_MAX == UINT32_MAX | |
637 | # define clzl clz32 | |
638 | # define ctzl ctz32 | |
639 | # define clol clo32 | |
640 | # define ctol cto32 | |
641 | # define ctpopl ctpop32 | |
652a4b7e | 642 | # define revbitl revbit32 |
01654373 RH |
643 | #elif ULONG_MAX == UINT64_MAX |
644 | # define clzl clz64 | |
645 | # define ctzl ctz64 | |
646 | # define clol clo64 | |
647 | # define ctol cto64 | |
648 | # define ctpopl ctpop64 | |
652a4b7e | 649 | # define revbitl revbit64 |
01654373 RH |
650 | #else |
651 | # error Unknown sizeof long | |
652 | #endif | |
653 | ||
8f1ed5f5 PM |
654 | static inline bool is_power_of_2(uint64_t value) |
655 | { | |
656 | if (!value) { | |
e52eeb46 | 657 | return false; |
8f1ed5f5 PM |
658 | } |
659 | ||
660 | return !(value & (value - 1)); | |
661 | } | |
662 | ||
43c64a09 MA |
663 | /** |
664 | * Return @value rounded down to the nearest power of two or zero. | |
665 | */ | |
666 | static inline uint64_t pow2floor(uint64_t value) | |
8f1ed5f5 | 667 | { |
43c64a09 MA |
668 | if (!value) { |
669 | /* Avoid undefined shift by 64 */ | |
670 | return 0; | |
8f1ed5f5 | 671 | } |
43c64a09 | 672 | return 0x8000000000000000ull >> clz64(value); |
8f1ed5f5 PM |
673 | } |
674 | ||
362aaf14 MA |
675 | /* |
676 | * Return @value rounded up to the nearest power of two modulo 2^64. | |
677 | * This is *zero* for @value > 2^63, so be careful. | |
678 | */ | |
8f1ed5f5 PM |
679 | static inline uint64_t pow2ceil(uint64_t value) |
680 | { | |
362aaf14 MA |
681 | int n = clz64(value - 1); |
682 | ||
683 | if (!n) { | |
684 | /* | |
685 | * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63 | |
686 | * Therefore, either @value == 0 or @value > 2^63. | |
687 | * If it's 0, return 1, else return 0. | |
688 | */ | |
689 | return !value; | |
8f1ed5f5 | 690 | } |
362aaf14 | 691 | return 0x8000000000000000ull >> (n - 1); |
8f1ed5f5 PM |
692 | } |
693 | ||
37e626ce YS |
694 | static inline uint32_t pow2roundup32(uint32_t x) |
695 | { | |
696 | x |= (x >> 1); | |
697 | x |= (x >> 2); | |
698 | x |= (x >> 4); | |
699 | x |= (x >> 8); | |
700 | x |= (x >> 16); | |
701 | return x + 1; | |
702 | } | |
703 | ||
f539fbe3 JRZ |
704 | /** |
705 | * urshift - 128-bit Unsigned Right Shift. | |
706 | * @plow: in/out - lower 64-bit integer. | |
707 | * @phigh: in/out - higher 64-bit integer. | |
708 | * @shift: in - bytes to shift, between 0 and 127. | |
709 | * | |
710 | * Result is zero-extended and stored in plow/phigh, which are | |
711 | * input/output variables. Shift values outside the range will | |
712 | * be mod to 128. In other words, the caller is responsible to | |
713 | * verify/assert both the shift range and plow/phigh pointers. | |
714 | */ | |
715 | void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift); | |
716 | ||
717 | /** | |
718 | * ulshift - 128-bit Unsigned Left Shift. | |
719 | * @plow: in/out - lower 64-bit integer. | |
720 | * @phigh: in/out - higher 64-bit integer. | |
721 | * @shift: in - bytes to shift, between 0 and 127. | |
722 | * @overflow: out - true if any 1-bit is shifted out. | |
723 | * | |
724 | * Result is zero-extended and stored in plow/phigh, which are | |
725 | * input/output variables. Shift values outside the range will | |
726 | * be mod to 128. In other words, the caller is responsible to | |
727 | * verify/assert both the shift range and plow/phigh pointers. | |
728 | */ | |
729 | void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow); | |
730 | ||
cb9c377f | 731 | #endif |