]> git.proxmox.com Git - mirror_qemu.git/blame - include/fpu/softfloat-macros.h
hw/s390x/3270-ccw: avoid taking address of fields in packed struct
[mirror_qemu.git] / include / fpu / softfloat-macros.h
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support macros
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
158142c2 20This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
a7d1ac78 21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
158142c2
FB
82/*----------------------------------------------------------------------------
83| Shifts `a' right by the number of bits given in `count'. If any nonzero
84| bits are shifted off, they are ``jammed'' into the least significant bit of
85| the result by setting the least significant bit to 1. The value of `count'
86| can be arbitrarily large; in particular, if `count' is greater than 32, the
87| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
88| The result is stored in the location pointed to by `zPtr'.
89*----------------------------------------------------------------------------*/
90
07d792d2 91static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
158142c2 92{
bb98fe42 93 uint32_t z;
158142c2
FB
94
95 if ( count == 0 ) {
96 z = a;
97 }
98 else if ( count < 32 ) {
99 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
100 }
101 else {
102 z = ( a != 0 );
103 }
104 *zPtr = z;
105
106}
107
108/*----------------------------------------------------------------------------
109| Shifts `a' right by the number of bits given in `count'. If any nonzero
110| bits are shifted off, they are ``jammed'' into the least significant bit of
111| the result by setting the least significant bit to 1. The value of `count'
112| can be arbitrarily large; in particular, if `count' is greater than 64, the
113| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
114| The result is stored in the location pointed to by `zPtr'.
115*----------------------------------------------------------------------------*/
116
07d792d2 117static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
158142c2 118{
bb98fe42 119 uint64_t z;
158142c2
FB
120
121 if ( count == 0 ) {
122 z = a;
123 }
124 else if ( count < 64 ) {
125 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
126 }
127 else {
128 z = ( a != 0 );
129 }
130 *zPtr = z;
131
132}
133
134/*----------------------------------------------------------------------------
135| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
136| _plus_ the number of bits given in `count'. The shifted result is at most
137| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
138| bits shifted off form a second 64-bit result as follows: The _last_ bit
139| shifted off is the most-significant bit of the extra result, and the other
140| 63 bits of the extra result are all zero if and only if _all_but_the_last_
141| bits shifted off were all zero. This extra result is stored in the location
142| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
a7d1ac78
PM
143| (This routine makes more sense if `a0' and `a1' are considered to form a
144| fixed-point value with binary point between `a0' and `a1'. This fixed-point
145| value is shifted right by the number of bits given in `count', and the
146| integer part of the result is returned at the location pointed to by
158142c2
FB
147| `z0Ptr'. The fractional part of the result may be slightly corrupted as
148| described above, and is returned at the location pointed to by `z1Ptr'.)
149*----------------------------------------------------------------------------*/
150
a49db98d 151static inline void
158142c2 152 shift64ExtraRightJamming(
07d792d2 153 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 154{
bb98fe42 155 uint64_t z0, z1;
8f506c70 156 int8_t negCount = ( - count ) & 63;
158142c2
FB
157
158 if ( count == 0 ) {
159 z1 = a1;
160 z0 = a0;
161 }
162 else if ( count < 64 ) {
163 z1 = ( a0<<negCount ) | ( a1 != 0 );
164 z0 = a0>>count;
165 }
166 else {
167 if ( count == 64 ) {
168 z1 = a0 | ( a1 != 0 );
169 }
170 else {
171 z1 = ( ( a0 | a1 ) != 0 );
172 }
173 z0 = 0;
174 }
175 *z1Ptr = z1;
176 *z0Ptr = z0;
177
178}
179
180/*----------------------------------------------------------------------------
181| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
182| number of bits given in `count'. Any bits shifted off are lost. The value
183| of `count' can be arbitrarily large; in particular, if `count' is greater
184| than 128, the result will be 0. The result is broken into two 64-bit pieces
185| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
186*----------------------------------------------------------------------------*/
187
a49db98d 188static inline void
158142c2 189 shift128Right(
07d792d2 190 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 191{
bb98fe42 192 uint64_t z0, z1;
8f506c70 193 int8_t negCount = ( - count ) & 63;
158142c2
FB
194
195 if ( count == 0 ) {
196 z1 = a1;
197 z0 = a0;
198 }
199 else if ( count < 64 ) {
200 z1 = ( a0<<negCount ) | ( a1>>count );
201 z0 = a0>>count;
202 }
203 else {
4039736e 204 z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
158142c2
FB
205 z0 = 0;
206 }
207 *z1Ptr = z1;
208 *z0Ptr = z0;
209
210}
211
212/*----------------------------------------------------------------------------
213| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
214| number of bits given in `count'. If any nonzero bits are shifted off, they
215| are ``jammed'' into the least significant bit of the result by setting the
216| least significant bit to 1. The value of `count' can be arbitrarily large;
217| in particular, if `count' is greater than 128, the result will be either
218| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
219| nonzero. The result is broken into two 64-bit pieces which are stored at
220| the locations pointed to by `z0Ptr' and `z1Ptr'.
221*----------------------------------------------------------------------------*/
222
a49db98d 223static inline void
158142c2 224 shift128RightJamming(
07d792d2 225 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 226{
bb98fe42 227 uint64_t z0, z1;
8f506c70 228 int8_t negCount = ( - count ) & 63;
158142c2
FB
229
230 if ( count == 0 ) {
231 z1 = a1;
232 z0 = a0;
233 }
234 else if ( count < 64 ) {
235 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
236 z0 = a0>>count;
237 }
238 else {
239 if ( count == 64 ) {
240 z1 = a0 | ( a1 != 0 );
241 }
242 else if ( count < 128 ) {
243 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
244 }
245 else {
246 z1 = ( ( a0 | a1 ) != 0 );
247 }
248 z0 = 0;
249 }
250 *z1Ptr = z1;
251 *z0Ptr = z0;
252
253}
254
255/*----------------------------------------------------------------------------
256| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
257| by 64 _plus_ the number of bits given in `count'. The shifted result is
258| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
259| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
260| off form a third 64-bit result as follows: The _last_ bit shifted off is
261| the most-significant bit of the extra result, and the other 63 bits of the
262| extra result are all zero if and only if _all_but_the_last_ bits shifted off
263| were all zero. This extra result is stored in the location pointed to by
264| `z2Ptr'. The value of `count' can be arbitrarily large.
265| (This routine makes more sense if `a0', `a1', and `a2' are considered
266| to form a fixed-point value with binary point between `a1' and `a2'. This
267| fixed-point value is shifted right by the number of bits given in `count',
268| and the integer part of the result is returned at the locations pointed to
269| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
270| corrupted as described above, and is returned at the location pointed to by
271| `z2Ptr'.)
272*----------------------------------------------------------------------------*/
273
a49db98d 274static inline void
158142c2 275 shift128ExtraRightJamming(
bb98fe42
AF
276 uint64_t a0,
277 uint64_t a1,
278 uint64_t a2,
07d792d2 279 int count,
bb98fe42
AF
280 uint64_t *z0Ptr,
281 uint64_t *z1Ptr,
282 uint64_t *z2Ptr
158142c2
FB
283 )
284{
bb98fe42 285 uint64_t z0, z1, z2;
8f506c70 286 int8_t negCount = ( - count ) & 63;
158142c2
FB
287
288 if ( count == 0 ) {
289 z2 = a2;
290 z1 = a1;
291 z0 = a0;
292 }
293 else {
294 if ( count < 64 ) {
295 z2 = a1<<negCount;
296 z1 = ( a0<<negCount ) | ( a1>>count );
297 z0 = a0>>count;
298 }
299 else {
300 if ( count == 64 ) {
301 z2 = a1;
302 z1 = a0;
303 }
304 else {
305 a2 |= a1;
306 if ( count < 128 ) {
307 z2 = a0<<negCount;
308 z1 = a0>>( count & 63 );
309 }
310 else {
311 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
312 z1 = 0;
313 }
314 }
315 z0 = 0;
316 }
317 z2 |= ( a2 != 0 );
318 }
319 *z2Ptr = z2;
320 *z1Ptr = z1;
321 *z0Ptr = z0;
322
323}
324
325/*----------------------------------------------------------------------------
326| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
327| number of bits given in `count'. Any bits shifted off are lost. The value
328| of `count' must be less than 64. The result is broken into two 64-bit
329| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
330*----------------------------------------------------------------------------*/
331
5dfbc9e4
RH
332static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
333 uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 334{
5dfbc9e4
RH
335 *z1Ptr = a1 << count;
336 *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
337}
158142c2 338
5dfbc9e4
RH
339/*----------------------------------------------------------------------------
340| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
341| number of bits given in `count'. Any bits shifted off are lost. The value
342| of `count' may be greater than 64. The result is broken into two 64-bit
343| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344*----------------------------------------------------------------------------*/
158142c2 345
5dfbc9e4
RH
346static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
347 uint64_t *z0Ptr, uint64_t *z1Ptr)
348{
349 if (count < 64) {
350 *z1Ptr = a1 << count;
351 *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
352 } else {
353 *z1Ptr = 0;
354 *z0Ptr = a1 << (count - 64);
355 }
158142c2
FB
356}
357
358/*----------------------------------------------------------------------------
359| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
360| by the number of bits given in `count'. Any bits shifted off are lost.
361| The value of `count' must be less than 64. The result is broken into three
362| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
363| `z1Ptr', and `z2Ptr'.
364*----------------------------------------------------------------------------*/
365
a49db98d 366static inline void
158142c2 367 shortShift192Left(
bb98fe42
AF
368 uint64_t a0,
369 uint64_t a1,
370 uint64_t a2,
07d792d2 371 int count,
bb98fe42
AF
372 uint64_t *z0Ptr,
373 uint64_t *z1Ptr,
374 uint64_t *z2Ptr
158142c2
FB
375 )
376{
bb98fe42 377 uint64_t z0, z1, z2;
8f506c70 378 int8_t negCount;
158142c2
FB
379
380 z2 = a2<<count;
381 z1 = a1<<count;
382 z0 = a0<<count;
383 if ( 0 < count ) {
384 negCount = ( ( - count ) & 63 );
385 z1 |= a2>>negCount;
386 z0 |= a1>>negCount;
387 }
388 *z2Ptr = z2;
389 *z1Ptr = z1;
390 *z0Ptr = z0;
391
392}
393
394/*----------------------------------------------------------------------------
395| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
396| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
397| any carry out is lost. The result is broken into two 64-bit pieces which
398| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
399*----------------------------------------------------------------------------*/
400
a49db98d 401static inline void
158142c2 402 add128(
bb98fe42 403 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 404{
bb98fe42 405 uint64_t z1;
158142c2
FB
406
407 z1 = a1 + b1;
408 *z1Ptr = z1;
409 *z0Ptr = a0 + b0 + ( z1 < a1 );
410
411}
412
413/*----------------------------------------------------------------------------
414| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
415| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
416| modulo 2^192, so any carry out is lost. The result is broken into three
417| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
418| `z1Ptr', and `z2Ptr'.
419*----------------------------------------------------------------------------*/
420
a49db98d 421static inline void
158142c2 422 add192(
bb98fe42
AF
423 uint64_t a0,
424 uint64_t a1,
425 uint64_t a2,
426 uint64_t b0,
427 uint64_t b1,
428 uint64_t b2,
429 uint64_t *z0Ptr,
430 uint64_t *z1Ptr,
431 uint64_t *z2Ptr
158142c2
FB
432 )
433{
bb98fe42 434 uint64_t z0, z1, z2;
8f506c70 435 int8_t carry0, carry1;
158142c2
FB
436
437 z2 = a2 + b2;
438 carry1 = ( z2 < a2 );
439 z1 = a1 + b1;
440 carry0 = ( z1 < a1 );
441 z0 = a0 + b0;
442 z1 += carry1;
443 z0 += ( z1 < carry1 );
444 z0 += carry0;
445 *z2Ptr = z2;
446 *z1Ptr = z1;
447 *z0Ptr = z0;
448
449}
450
451/*----------------------------------------------------------------------------
452| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
453| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
454| 2^128, so any borrow out (carry out) is lost. The result is broken into two
455| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
456| `z1Ptr'.
457*----------------------------------------------------------------------------*/
458
a49db98d 459static inline void
158142c2 460 sub128(
bb98fe42 461 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2
FB
462{
463
464 *z1Ptr = a1 - b1;
465 *z0Ptr = a0 - b0 - ( a1 < b1 );
466
467}
468
469/*----------------------------------------------------------------------------
470| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
471| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
472| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
473| result is broken into three 64-bit pieces which are stored at the locations
474| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
475*----------------------------------------------------------------------------*/
476
a49db98d 477static inline void
158142c2 478 sub192(
bb98fe42
AF
479 uint64_t a0,
480 uint64_t a1,
481 uint64_t a2,
482 uint64_t b0,
483 uint64_t b1,
484 uint64_t b2,
485 uint64_t *z0Ptr,
486 uint64_t *z1Ptr,
487 uint64_t *z2Ptr
158142c2
FB
488 )
489{
bb98fe42 490 uint64_t z0, z1, z2;
8f506c70 491 int8_t borrow0, borrow1;
158142c2
FB
492
493 z2 = a2 - b2;
494 borrow1 = ( a2 < b2 );
495 z1 = a1 - b1;
496 borrow0 = ( a1 < b1 );
497 z0 = a0 - b0;
498 z0 -= ( z1 < borrow1 );
499 z1 -= borrow1;
500 z0 -= borrow0;
501 *z2Ptr = z2;
502 *z1Ptr = z1;
503 *z0Ptr = z0;
504
505}
506
507/*----------------------------------------------------------------------------
508| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
509| into two 64-bit pieces which are stored at the locations pointed to by
510| `z0Ptr' and `z1Ptr'.
511*----------------------------------------------------------------------------*/
512
a49db98d 513static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 514{
bb98fe42
AF
515 uint32_t aHigh, aLow, bHigh, bLow;
516 uint64_t z0, zMiddleA, zMiddleB, z1;
158142c2
FB
517
518 aLow = a;
519 aHigh = a>>32;
520 bLow = b;
521 bHigh = b>>32;
bb98fe42
AF
522 z1 = ( (uint64_t) aLow ) * bLow;
523 zMiddleA = ( (uint64_t) aLow ) * bHigh;
524 zMiddleB = ( (uint64_t) aHigh ) * bLow;
525 z0 = ( (uint64_t) aHigh ) * bHigh;
158142c2 526 zMiddleA += zMiddleB;
bb98fe42 527 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
158142c2
FB
528 zMiddleA <<= 32;
529 z1 += zMiddleA;
530 z0 += ( z1 < zMiddleA );
531 *z1Ptr = z1;
532 *z0Ptr = z0;
533
534}
535
536/*----------------------------------------------------------------------------
537| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
538| `b' to obtain a 192-bit product. The product is broken into three 64-bit
539| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
540| `z2Ptr'.
541*----------------------------------------------------------------------------*/
542
a49db98d 543static inline void
158142c2 544 mul128By64To192(
bb98fe42
AF
545 uint64_t a0,
546 uint64_t a1,
547 uint64_t b,
548 uint64_t *z0Ptr,
549 uint64_t *z1Ptr,
550 uint64_t *z2Ptr
158142c2
FB
551 )
552{
bb98fe42 553 uint64_t z0, z1, z2, more1;
158142c2
FB
554
555 mul64To128( a1, b, &z1, &z2 );
556 mul64To128( a0, b, &z0, &more1 );
557 add128( z0, more1, 0, z1, &z0, &z1 );
558 *z2Ptr = z2;
559 *z1Ptr = z1;
560 *z0Ptr = z0;
561
562}
563
564/*----------------------------------------------------------------------------
565| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
566| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
567| product. The product is broken into four 64-bit pieces which are stored at
568| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
569*----------------------------------------------------------------------------*/
570
a49db98d 571static inline void
158142c2 572 mul128To256(
bb98fe42
AF
573 uint64_t a0,
574 uint64_t a1,
575 uint64_t b0,
576 uint64_t b1,
577 uint64_t *z0Ptr,
578 uint64_t *z1Ptr,
579 uint64_t *z2Ptr,
580 uint64_t *z3Ptr
158142c2
FB
581 )
582{
bb98fe42
AF
583 uint64_t z0, z1, z2, z3;
584 uint64_t more1, more2;
158142c2
FB
585
586 mul64To128( a1, b1, &z2, &z3 );
587 mul64To128( a1, b0, &z1, &more2 );
588 add128( z1, more2, 0, z2, &z1, &z2 );
589 mul64To128( a0, b0, &z0, &more1 );
590 add128( z0, more1, 0, z1, &z0, &z1 );
591 mul64To128( a0, b1, &more1, &more2 );
592 add128( more1, more2, 0, z2, &more1, &z2 );
593 add128( z0, z1, 0, more1, &z0, &z1 );
594 *z3Ptr = z3;
595 *z2Ptr = z2;
596 *z1Ptr = z1;
597 *z0Ptr = z0;
598
599}
600
601/*----------------------------------------------------------------------------
602| Returns an approximation to the 64-bit integer quotient obtained by dividing
603| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
604| divisor `b' must be at least 2^63. If q is the exact quotient truncated
605| toward zero, the approximation returned lies between q and q + 2 inclusive.
606| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
607| unsigned integer is returned.
608*----------------------------------------------------------------------------*/
609
88857aca 610static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
158142c2 611{
bb98fe42
AF
612 uint64_t b0, b1;
613 uint64_t rem0, rem1, term0, term1;
614 uint64_t z;
158142c2
FB
615
616 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
617 b0 = b>>32;
618 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
619 mul64To128( b, z, &term0, &term1 );
620 sub128( a0, a1, term0, term1, &rem0, &rem1 );
bb98fe42 621 while ( ( (int64_t) rem0 ) < 0 ) {
158142c2
FB
622 z -= LIT64( 0x100000000 );
623 b1 = b<<32;
624 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
625 }
626 rem0 = ( rem0<<32 ) | ( rem1>>32 );
627 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
628 return z;
629
630}
631
cf07323d
AB
632/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
633 * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
634 *
635 * Licensed under the GPLv2/LGPLv3
636 */
5dfbc9e4
RH
637static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
638 uint64_t n0, uint64_t d)
cf07323d 639{
b299e88d
RH
640#if defined(__x86_64__)
641 uint64_t q;
642 asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
643 return q;
2c00542c 644#elif defined(__s390x__) && !defined(__clang__)
739df333
RH
645 /* Need to use a TImode type to get an even register pair for DLGR. */
646 unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
647 asm("dlgr %0, %1" : "+r"(n) : "r"(d));
648 *r = n >> 64;
649 return n;
7370981b
RH
650#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
651 /* From Power ISA 2.06, programming note for divdeu. */
27ae5109
RH
652 uint64_t q1, q2, Q, r1, r2, R;
653 asm("divdeu %0,%2,%4; divdu %1,%3,%4"
654 : "=&r"(q1), "=r"(q2)
655 : "r"(n1), "r"(n0), "r"(d));
656 r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
657 r2 = n0 - (q2 * d);
658 Q = q1 + q2;
659 R = r1 + r2;
660 if (R >= d || R < r2) { /* overflow implies R > d */
661 Q += 1;
662 R -= d;
663 }
664 *r = R;
665 return Q;
b299e88d 666#else
cf07323d
AB
667 uint64_t d0, d1, q0, q1, r1, r0, m;
668
669 d0 = (uint32_t)d;
670 d1 = d >> 32;
671
672 r1 = n1 % d1;
673 q1 = n1 / d1;
674 m = q1 * d0;
675 r1 = (r1 << 32) | (n0 >> 32);
676 if (r1 < m) {
677 q1 -= 1;
678 r1 += d;
679 if (r1 >= d) {
680 if (r1 < m) {
681 q1 -= 1;
682 r1 += d;
683 }
684 }
685 }
686 r1 -= m;
687
688 r0 = r1 % d1;
689 q0 = r1 / d1;
690 m = q0 * d0;
691 r0 = (r0 << 32) | (uint32_t)n0;
692 if (r0 < m) {
693 q0 -= 1;
694 r0 += d;
695 if (r0 >= d) {
696 if (r0 < m) {
697 q0 -= 1;
698 r0 += d;
699 }
700 }
701 }
702 r0 -= m;
703
5dfbc9e4
RH
704 *r = r0;
705 return (q1 << 32) | q0;
b299e88d 706#endif
cf07323d
AB
707}
708
158142c2
FB
709/*----------------------------------------------------------------------------
710| Returns an approximation to the square root of the 32-bit significand given
711| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
712| `aExp' (the least significant bit) is 1, the integer returned approximates
713| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
714| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
715| case, the approximation returned lies strictly within +/-2 of the exact
716| value.
717*----------------------------------------------------------------------------*/
718
88857aca 719static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
158142c2 720{
bb98fe42 721 static const uint16_t sqrtOddAdjustments[] = {
158142c2
FB
722 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
723 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
724 };
bb98fe42 725 static const uint16_t sqrtEvenAdjustments[] = {
158142c2
FB
726 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
727 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
728 };
8f506c70 729 int8_t index;
bb98fe42 730 uint32_t z;
158142c2
FB
731
732 index = ( a>>27 ) & 15;
733 if ( aExp & 1 ) {
3f4cb3d3 734 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
158142c2
FB
735 z = ( ( a / z )<<14 ) + ( z<<15 );
736 a >>= 1;
737 }
738 else {
3f4cb3d3 739 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
158142c2
FB
740 z = a / z + z;
741 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
bb98fe42 742 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
158142c2 743 }
bb98fe42 744 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
158142c2
FB
745
746}
747
158142c2
FB
748/*----------------------------------------------------------------------------
749| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
750| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
751| Otherwise, returns 0.
752*----------------------------------------------------------------------------*/
753
a49db98d 754static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
755{
756
757 return ( a0 == b0 ) && ( a1 == b1 );
758
759}
760
761/*----------------------------------------------------------------------------
762| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
763| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
764| Otherwise, returns 0.
765*----------------------------------------------------------------------------*/
766
a49db98d 767static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
768{
769
770 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
771
772}
773
774/*----------------------------------------------------------------------------
775| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
776| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
777| returns 0.
778*----------------------------------------------------------------------------*/
779
a49db98d 780static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
781{
782
783 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
784
785}
786
787/*----------------------------------------------------------------------------
788| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
789| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
790| Otherwise, returns 0.
791*----------------------------------------------------------------------------*/
792
a49db98d 793static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
794{
795
796 return ( a0 != b0 ) || ( a1 != b1 );
797
798}