]> git.proxmox.com Git - qemu.git/blame - fpu/softfloat-macros.h
Merge remote-tracking branch 'pm-arm/for-upstream' into pm
[qemu.git] / fpu / softfloat-macros.h
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support macros
3 *
4 * Derived from SoftFloat.
5 */
158142c2
FB
6
7/*============================================================================
8
9This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
10Arithmetic Package, Release 2b.
11
12Written by John R. Hauser. This work was made possible in part by the
13International Computer Science Institute, located at Suite 600, 1947 Center
14Street, Berkeley, California 94704. Funding was partially provided by the
15National Science Foundation under grant MIP-9311980. The original version
16of this code was written as part of a project to build a fixed-point vector
17processor in collaboration with the University of California at Berkeley,
18overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20arithmetic/SoftFloat.html'.
21
22THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
29OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30
31Derivative works are acceptable, even for commercial purposes, so long as
32(1) the source code for the derivative work includes prominent notice that
33the work is derivative, and (2) the source code includes prominent notice with
34these four paragraphs for those parts of this code that are retained.
35
36=============================================================================*/
37
b3b4c7f3
AJ
38/*----------------------------------------------------------------------------
39| This macro tests for minimum version of the GNU C compiler.
40*----------------------------------------------------------------------------*/
41#if defined(__GNUC__) && defined(__GNUC_MINOR__)
42# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
43 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
44#else
45# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
46#endif
47
48
158142c2
FB
49/*----------------------------------------------------------------------------
50| Shifts `a' right by the number of bits given in `count'. If any nonzero
51| bits are shifted off, they are ``jammed'' into the least significant bit of
52| the result by setting the least significant bit to 1. The value of `count'
53| can be arbitrarily large; in particular, if `count' is greater than 32, the
54| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
55| The result is stored in the location pointed to by `zPtr'.
56*----------------------------------------------------------------------------*/
57
bb98fe42 58INLINE void shift32RightJamming( uint32_t a, int16 count, uint32_t *zPtr )
158142c2 59{
bb98fe42 60 uint32_t z;
158142c2
FB
61
62 if ( count == 0 ) {
63 z = a;
64 }
65 else if ( count < 32 ) {
66 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
67 }
68 else {
69 z = ( a != 0 );
70 }
71 *zPtr = z;
72
73}
74
75/*----------------------------------------------------------------------------
76| Shifts `a' right by the number of bits given in `count'. If any nonzero
77| bits are shifted off, they are ``jammed'' into the least significant bit of
78| the result by setting the least significant bit to 1. The value of `count'
79| can be arbitrarily large; in particular, if `count' is greater than 64, the
80| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
81| The result is stored in the location pointed to by `zPtr'.
82*----------------------------------------------------------------------------*/
83
bb98fe42 84INLINE void shift64RightJamming( uint64_t a, int16 count, uint64_t *zPtr )
158142c2 85{
bb98fe42 86 uint64_t z;
158142c2
FB
87
88 if ( count == 0 ) {
89 z = a;
90 }
91 else if ( count < 64 ) {
92 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
93 }
94 else {
95 z = ( a != 0 );
96 }
97 *zPtr = z;
98
99}
100
101/*----------------------------------------------------------------------------
102| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
103| _plus_ the number of bits given in `count'. The shifted result is at most
104| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
105| bits shifted off form a second 64-bit result as follows: The _last_ bit
106| shifted off is the most-significant bit of the extra result, and the other
107| 63 bits of the extra result are all zero if and only if _all_but_the_last_
108| bits shifted off were all zero. This extra result is stored in the location
109| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
110| (This routine makes more sense if `a0' and `a1' are considered to form
111| a fixed-point value with binary point between `a0' and `a1'. This fixed-
112| point value is shifted right by the number of bits given in `count', and
113| the integer part of the result is returned at the location pointed to by
114| `z0Ptr'. The fractional part of the result may be slightly corrupted as
115| described above, and is returned at the location pointed to by `z1Ptr'.)
116*----------------------------------------------------------------------------*/
117
118INLINE void
119 shift64ExtraRightJamming(
bb98fe42 120 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 121{
bb98fe42 122 uint64_t z0, z1;
158142c2
FB
123 int8 negCount = ( - count ) & 63;
124
125 if ( count == 0 ) {
126 z1 = a1;
127 z0 = a0;
128 }
129 else if ( count < 64 ) {
130 z1 = ( a0<<negCount ) | ( a1 != 0 );
131 z0 = a0>>count;
132 }
133 else {
134 if ( count == 64 ) {
135 z1 = a0 | ( a1 != 0 );
136 }
137 else {
138 z1 = ( ( a0 | a1 ) != 0 );
139 }
140 z0 = 0;
141 }
142 *z1Ptr = z1;
143 *z0Ptr = z0;
144
145}
146
147/*----------------------------------------------------------------------------
148| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
149| number of bits given in `count'. Any bits shifted off are lost. The value
150| of `count' can be arbitrarily large; in particular, if `count' is greater
151| than 128, the result will be 0. The result is broken into two 64-bit pieces
152| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
153*----------------------------------------------------------------------------*/
154
155INLINE void
156 shift128Right(
bb98fe42 157 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 158{
bb98fe42 159 uint64_t z0, z1;
158142c2
FB
160 int8 negCount = ( - count ) & 63;
161
162 if ( count == 0 ) {
163 z1 = a1;
164 z0 = a0;
165 }
166 else if ( count < 64 ) {
167 z1 = ( a0<<negCount ) | ( a1>>count );
168 z0 = a0>>count;
169 }
170 else {
171 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
172 z0 = 0;
173 }
174 *z1Ptr = z1;
175 *z0Ptr = z0;
176
177}
178
179/*----------------------------------------------------------------------------
180| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
181| number of bits given in `count'. If any nonzero bits are shifted off, they
182| are ``jammed'' into the least significant bit of the result by setting the
183| least significant bit to 1. The value of `count' can be arbitrarily large;
184| in particular, if `count' is greater than 128, the result will be either
185| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
186| nonzero. The result is broken into two 64-bit pieces which are stored at
187| the locations pointed to by `z0Ptr' and `z1Ptr'.
188*----------------------------------------------------------------------------*/
189
190INLINE void
191 shift128RightJamming(
bb98fe42 192 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 193{
bb98fe42 194 uint64_t z0, z1;
158142c2
FB
195 int8 negCount = ( - count ) & 63;
196
197 if ( count == 0 ) {
198 z1 = a1;
199 z0 = a0;
200 }
201 else if ( count < 64 ) {
202 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
203 z0 = a0>>count;
204 }
205 else {
206 if ( count == 64 ) {
207 z1 = a0 | ( a1 != 0 );
208 }
209 else if ( count < 128 ) {
210 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
211 }
212 else {
213 z1 = ( ( a0 | a1 ) != 0 );
214 }
215 z0 = 0;
216 }
217 *z1Ptr = z1;
218 *z0Ptr = z0;
219
220}
221
222/*----------------------------------------------------------------------------
223| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
224| by 64 _plus_ the number of bits given in `count'. The shifted result is
225| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
226| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
227| off form a third 64-bit result as follows: The _last_ bit shifted off is
228| the most-significant bit of the extra result, and the other 63 bits of the
229| extra result are all zero if and only if _all_but_the_last_ bits shifted off
230| were all zero. This extra result is stored in the location pointed to by
231| `z2Ptr'. The value of `count' can be arbitrarily large.
232| (This routine makes more sense if `a0', `a1', and `a2' are considered
233| to form a fixed-point value with binary point between `a1' and `a2'. This
234| fixed-point value is shifted right by the number of bits given in `count',
235| and the integer part of the result is returned at the locations pointed to
236| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
237| corrupted as described above, and is returned at the location pointed to by
238| `z2Ptr'.)
239*----------------------------------------------------------------------------*/
240
241INLINE void
242 shift128ExtraRightJamming(
bb98fe42
AF
243 uint64_t a0,
244 uint64_t a1,
245 uint64_t a2,
158142c2 246 int16 count,
bb98fe42
AF
247 uint64_t *z0Ptr,
248 uint64_t *z1Ptr,
249 uint64_t *z2Ptr
158142c2
FB
250 )
251{
bb98fe42 252 uint64_t z0, z1, z2;
158142c2
FB
253 int8 negCount = ( - count ) & 63;
254
255 if ( count == 0 ) {
256 z2 = a2;
257 z1 = a1;
258 z0 = a0;
259 }
260 else {
261 if ( count < 64 ) {
262 z2 = a1<<negCount;
263 z1 = ( a0<<negCount ) | ( a1>>count );
264 z0 = a0>>count;
265 }
266 else {
267 if ( count == 64 ) {
268 z2 = a1;
269 z1 = a0;
270 }
271 else {
272 a2 |= a1;
273 if ( count < 128 ) {
274 z2 = a0<<negCount;
275 z1 = a0>>( count & 63 );
276 }
277 else {
278 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
279 z1 = 0;
280 }
281 }
282 z0 = 0;
283 }
284 z2 |= ( a2 != 0 );
285 }
286 *z2Ptr = z2;
287 *z1Ptr = z1;
288 *z0Ptr = z0;
289
290}
291
292/*----------------------------------------------------------------------------
293| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
294| number of bits given in `count'. Any bits shifted off are lost. The value
295| of `count' must be less than 64. The result is broken into two 64-bit
296| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
297*----------------------------------------------------------------------------*/
298
299INLINE void
300 shortShift128Left(
bb98fe42 301 uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2
FB
302{
303
304 *z1Ptr = a1<<count;
305 *z0Ptr =
306 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
307
308}
309
310/*----------------------------------------------------------------------------
311| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
312| by the number of bits given in `count'. Any bits shifted off are lost.
313| The value of `count' must be less than 64. The result is broken into three
314| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
315| `z1Ptr', and `z2Ptr'.
316*----------------------------------------------------------------------------*/
317
318INLINE void
319 shortShift192Left(
bb98fe42
AF
320 uint64_t a0,
321 uint64_t a1,
322 uint64_t a2,
158142c2 323 int16 count,
bb98fe42
AF
324 uint64_t *z0Ptr,
325 uint64_t *z1Ptr,
326 uint64_t *z2Ptr
158142c2
FB
327 )
328{
bb98fe42 329 uint64_t z0, z1, z2;
158142c2
FB
330 int8 negCount;
331
332 z2 = a2<<count;
333 z1 = a1<<count;
334 z0 = a0<<count;
335 if ( 0 < count ) {
336 negCount = ( ( - count ) & 63 );
337 z1 |= a2>>negCount;
338 z0 |= a1>>negCount;
339 }
340 *z2Ptr = z2;
341 *z1Ptr = z1;
342 *z0Ptr = z0;
343
344}
345
346/*----------------------------------------------------------------------------
347| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
348| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
349| any carry out is lost. The result is broken into two 64-bit pieces which
350| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
351*----------------------------------------------------------------------------*/
352
353INLINE void
354 add128(
bb98fe42 355 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 356{
bb98fe42 357 uint64_t z1;
158142c2
FB
358
359 z1 = a1 + b1;
360 *z1Ptr = z1;
361 *z0Ptr = a0 + b0 + ( z1 < a1 );
362
363}
364
365/*----------------------------------------------------------------------------
366| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
367| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
368| modulo 2^192, so any carry out is lost. The result is broken into three
369| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
370| `z1Ptr', and `z2Ptr'.
371*----------------------------------------------------------------------------*/
372
373INLINE void
374 add192(
bb98fe42
AF
375 uint64_t a0,
376 uint64_t a1,
377 uint64_t a2,
378 uint64_t b0,
379 uint64_t b1,
380 uint64_t b2,
381 uint64_t *z0Ptr,
382 uint64_t *z1Ptr,
383 uint64_t *z2Ptr
158142c2
FB
384 )
385{
bb98fe42 386 uint64_t z0, z1, z2;
158142c2
FB
387 int8 carry0, carry1;
388
389 z2 = a2 + b2;
390 carry1 = ( z2 < a2 );
391 z1 = a1 + b1;
392 carry0 = ( z1 < a1 );
393 z0 = a0 + b0;
394 z1 += carry1;
395 z0 += ( z1 < carry1 );
396 z0 += carry0;
397 *z2Ptr = z2;
398 *z1Ptr = z1;
399 *z0Ptr = z0;
400
401}
402
403/*----------------------------------------------------------------------------
404| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
405| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
406| 2^128, so any borrow out (carry out) is lost. The result is broken into two
407| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
408| `z1Ptr'.
409*----------------------------------------------------------------------------*/
410
411INLINE void
412 sub128(
bb98fe42 413 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2
FB
414{
415
416 *z1Ptr = a1 - b1;
417 *z0Ptr = a0 - b0 - ( a1 < b1 );
418
419}
420
421/*----------------------------------------------------------------------------
422| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
423| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
424| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
425| result is broken into three 64-bit pieces which are stored at the locations
426| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
427*----------------------------------------------------------------------------*/
428
429INLINE void
430 sub192(
bb98fe42
AF
431 uint64_t a0,
432 uint64_t a1,
433 uint64_t a2,
434 uint64_t b0,
435 uint64_t b1,
436 uint64_t b2,
437 uint64_t *z0Ptr,
438 uint64_t *z1Ptr,
439 uint64_t *z2Ptr
158142c2
FB
440 )
441{
bb98fe42 442 uint64_t z0, z1, z2;
158142c2
FB
443 int8 borrow0, borrow1;
444
445 z2 = a2 - b2;
446 borrow1 = ( a2 < b2 );
447 z1 = a1 - b1;
448 borrow0 = ( a1 < b1 );
449 z0 = a0 - b0;
450 z0 -= ( z1 < borrow1 );
451 z1 -= borrow1;
452 z0 -= borrow0;
453 *z2Ptr = z2;
454 *z1Ptr = z1;
455 *z0Ptr = z0;
456
457}
458
459/*----------------------------------------------------------------------------
460| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
461| into two 64-bit pieces which are stored at the locations pointed to by
462| `z0Ptr' and `z1Ptr'.
463*----------------------------------------------------------------------------*/
464
bb98fe42 465INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
158142c2 466{
bb98fe42
AF
467 uint32_t aHigh, aLow, bHigh, bLow;
468 uint64_t z0, zMiddleA, zMiddleB, z1;
158142c2
FB
469
470 aLow = a;
471 aHigh = a>>32;
472 bLow = b;
473 bHigh = b>>32;
bb98fe42
AF
474 z1 = ( (uint64_t) aLow ) * bLow;
475 zMiddleA = ( (uint64_t) aLow ) * bHigh;
476 zMiddleB = ( (uint64_t) aHigh ) * bLow;
477 z0 = ( (uint64_t) aHigh ) * bHigh;
158142c2 478 zMiddleA += zMiddleB;
bb98fe42 479 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
158142c2
FB
480 zMiddleA <<= 32;
481 z1 += zMiddleA;
482 z0 += ( z1 < zMiddleA );
483 *z1Ptr = z1;
484 *z0Ptr = z0;
485
486}
487
488/*----------------------------------------------------------------------------
489| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
490| `b' to obtain a 192-bit product. The product is broken into three 64-bit
491| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
492| `z2Ptr'.
493*----------------------------------------------------------------------------*/
494
495INLINE void
496 mul128By64To192(
bb98fe42
AF
497 uint64_t a0,
498 uint64_t a1,
499 uint64_t b,
500 uint64_t *z0Ptr,
501 uint64_t *z1Ptr,
502 uint64_t *z2Ptr
158142c2
FB
503 )
504{
bb98fe42 505 uint64_t z0, z1, z2, more1;
158142c2
FB
506
507 mul64To128( a1, b, &z1, &z2 );
508 mul64To128( a0, b, &z0, &more1 );
509 add128( z0, more1, 0, z1, &z0, &z1 );
510 *z2Ptr = z2;
511 *z1Ptr = z1;
512 *z0Ptr = z0;
513
514}
515
516/*----------------------------------------------------------------------------
517| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519| product. The product is broken into four 64-bit pieces which are stored at
520| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521*----------------------------------------------------------------------------*/
522
523INLINE void
524 mul128To256(
bb98fe42
AF
525 uint64_t a0,
526 uint64_t a1,
527 uint64_t b0,
528 uint64_t b1,
529 uint64_t *z0Ptr,
530 uint64_t *z1Ptr,
531 uint64_t *z2Ptr,
532 uint64_t *z3Ptr
158142c2
FB
533 )
534{
bb98fe42
AF
535 uint64_t z0, z1, z2, z3;
536 uint64_t more1, more2;
158142c2
FB
537
538 mul64To128( a1, b1, &z2, &z3 );
539 mul64To128( a1, b0, &z1, &more2 );
540 add128( z1, more2, 0, z2, &z1, &z2 );
541 mul64To128( a0, b0, &z0, &more1 );
542 add128( z0, more1, 0, z1, &z0, &z1 );
543 mul64To128( a0, b1, &more1, &more2 );
544 add128( more1, more2, 0, z2, &more1, &z2 );
545 add128( z0, z1, 0, more1, &z0, &z1 );
546 *z3Ptr = z3;
547 *z2Ptr = z2;
548 *z1Ptr = z1;
549 *z0Ptr = z0;
550
551}
552
553/*----------------------------------------------------------------------------
554| Returns an approximation to the 64-bit integer quotient obtained by dividing
555| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
556| divisor `b' must be at least 2^63. If q is the exact quotient truncated
557| toward zero, the approximation returned lies between q and q + 2 inclusive.
558| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559| unsigned integer is returned.
560*----------------------------------------------------------------------------*/
561
bb98fe42 562static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
158142c2 563{
bb98fe42
AF
564 uint64_t b0, b1;
565 uint64_t rem0, rem1, term0, term1;
566 uint64_t z;
158142c2
FB
567
568 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
569 b0 = b>>32;
570 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571 mul64To128( b, z, &term0, &term1 );
572 sub128( a0, a1, term0, term1, &rem0, &rem1 );
bb98fe42 573 while ( ( (int64_t) rem0 ) < 0 ) {
158142c2
FB
574 z -= LIT64( 0x100000000 );
575 b1 = b<<32;
576 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
577 }
578 rem0 = ( rem0<<32 ) | ( rem1>>32 );
579 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
580 return z;
581
582}
583
584/*----------------------------------------------------------------------------
585| Returns an approximation to the square root of the 32-bit significand given
586| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
587| `aExp' (the least significant bit) is 1, the integer returned approximates
588| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
589| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
590| case, the approximation returned lies strictly within +/-2 of the exact
591| value.
592*----------------------------------------------------------------------------*/
593
bb98fe42 594static uint32_t estimateSqrt32( int16 aExp, uint32_t a )
158142c2 595{
bb98fe42 596 static const uint16_t sqrtOddAdjustments[] = {
158142c2
FB
597 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
598 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
599 };
bb98fe42 600 static const uint16_t sqrtEvenAdjustments[] = {
158142c2
FB
601 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
602 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
603 };
604 int8 index;
bb98fe42 605 uint32_t z;
158142c2
FB
606
607 index = ( a>>27 ) & 15;
608 if ( aExp & 1 ) {
3f4cb3d3 609 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
158142c2
FB
610 z = ( ( a / z )<<14 ) + ( z<<15 );
611 a >>= 1;
612 }
613 else {
3f4cb3d3 614 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
158142c2
FB
615 z = a / z + z;
616 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
bb98fe42 617 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
158142c2 618 }
bb98fe42 619 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
158142c2
FB
620
621}
622
623/*----------------------------------------------------------------------------
624| Returns the number of leading 0 bits before the most-significant 1 bit of
625| `a'. If `a' is zero, 32 is returned.
626*----------------------------------------------------------------------------*/
627
bb98fe42 628static int8 countLeadingZeros32( uint32_t a )
158142c2 629{
b3b4c7f3
AJ
630#if SOFTFLOAT_GNUC_PREREQ(3, 4)
631 if (a) {
632 return __builtin_clz(a);
633 } else {
634 return 32;
635 }
636#else
158142c2
FB
637 static const int8 countLeadingZerosHigh[] = {
638 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
639 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
640 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
641 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
642 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
645 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
654 };
655 int8 shiftCount;
656
657 shiftCount = 0;
658 if ( a < 0x10000 ) {
659 shiftCount += 16;
660 a <<= 16;
661 }
662 if ( a < 0x1000000 ) {
663 shiftCount += 8;
664 a <<= 8;
665 }
666 shiftCount += countLeadingZerosHigh[ a>>24 ];
667 return shiftCount;
b3b4c7f3 668#endif
158142c2
FB
669}
670
671/*----------------------------------------------------------------------------
672| Returns the number of leading 0 bits before the most-significant 1 bit of
673| `a'. If `a' is zero, 64 is returned.
674*----------------------------------------------------------------------------*/
675
bb98fe42 676static int8 countLeadingZeros64( uint64_t a )
158142c2 677{
b3b4c7f3
AJ
678#if SOFTFLOAT_GNUC_PREREQ(3, 4)
679 if (a) {
680 return __builtin_clzll(a);
681 } else {
682 return 64;
683 }
684#else
158142c2
FB
685 int8 shiftCount;
686
687 shiftCount = 0;
bb98fe42 688 if ( a < ( (uint64_t) 1 )<<32 ) {
158142c2
FB
689 shiftCount += 32;
690 }
691 else {
692 a >>= 32;
693 }
694 shiftCount += countLeadingZeros32( a );
695 return shiftCount;
b3b4c7f3 696#endif
158142c2
FB
697}
698
699/*----------------------------------------------------------------------------
700| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
701| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
702| Otherwise, returns 0.
703*----------------------------------------------------------------------------*/
704
bb98fe42 705INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
706{
707
708 return ( a0 == b0 ) && ( a1 == b1 );
709
710}
711
712/*----------------------------------------------------------------------------
713| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
714| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
715| Otherwise, returns 0.
716*----------------------------------------------------------------------------*/
717
bb98fe42 718INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
719{
720
721 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
722
723}
724
725/*----------------------------------------------------------------------------
726| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
727| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
728| returns 0.
729*----------------------------------------------------------------------------*/
730
bb98fe42 731INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
732{
733
734 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
735
736}
737
738/*----------------------------------------------------------------------------
739| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
740| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
741| Otherwise, returns 0.
742*----------------------------------------------------------------------------*/
743
bb98fe42 744INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
158142c2
FB
745{
746
747 return ( a0 != b0 ) || ( a1 != b1 );
748
749}