]>
Commit | Line | Data |
---|---|---|
92a42be0 SL |
1 | //= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is dual licensed under the MIT and the University of Illinois Open | |
6 | // Source Licenses. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file implements a fairly generic conversion from a wider to a narrower | |
11 | // IEEE-754 floating-point type in the default (round to nearest, ties to even) | |
12 | // rounding mode. The constants and types defined following the includes below | |
13 | // parameterize the conversion. | |
14 | // | |
15 | // This routine can be trivially adapted to support conversions to | |
16 | // half-precision or from quad-precision. It does not support types that don't | |
17 | // use the usual IEEE-754 interchange formats; specifically, some work would be | |
18 | // needed to adapt it to (for example) the Intel 80-bit format or PowerPC | |
19 | // double-double format. | |
20 | // | |
21 | // Note please, however, that this implementation is only intended to support | |
22 | // *narrowing* operations; if you need to convert to a *wider* floating-point | |
23 | // type (e.g. float -> double), then this routine will not do what you want it | |
24 | // to. | |
25 | // | |
26 | // It also requires that integer types at least as large as both formats | |
27 | // are available on the target platform; this may pose a problem when trying | |
28 | // to add support for quad on some 32-bit systems, for example. | |
29 | // | |
30 | // Finally, the following assumptions are made: | |
31 | // | |
32 | // 1. floating-point types and integer types have the same endianness on the | |
33 | // target platform | |
34 | // | |
35 | // 2. quiet NaNs, if supported, are indicated by the leading bit of the | |
36 | // significand field being set | |
37 | // | |
38 | //===----------------------------------------------------------------------===// | |
39 | ||
40 | #include "fp_trunc.h" | |
41 | ||
42 | static __inline dst_t __truncXfYf2__(src_t a) { | |
43 | // Various constants whose values follow from the type parameters. | |
44 | // Any reasonable optimizer will fold and propagate all of these. | |
45 | const int srcBits = sizeof(src_t)*CHAR_BIT; | |
46 | const int srcExpBits = srcBits - srcSigBits - 1; | |
47 | const int srcInfExp = (1 << srcExpBits) - 1; | |
48 | const int srcExpBias = srcInfExp >> 1; | |
49 | ||
50 | const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; | |
51 | const src_rep_t srcSignificandMask = srcMinNormal - 1; | |
52 | const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; | |
53 | const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); | |
54 | const src_rep_t srcAbsMask = srcSignMask - 1; | |
55 | const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; | |
56 | const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); | |
57 | const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); | |
58 | const src_rep_t srcNaNCode = srcQNaN - 1; | |
59 | ||
60 | const int dstBits = sizeof(dst_t)*CHAR_BIT; | |
61 | const int dstExpBits = dstBits - dstSigBits - 1; | |
62 | const int dstInfExp = (1 << dstExpBits) - 1; | |
63 | const int dstExpBias = dstInfExp >> 1; | |
64 | ||
65 | const int underflowExponent = srcExpBias + 1 - dstExpBias; | |
66 | const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; | |
67 | const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; | |
68 | const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; | |
69 | ||
70 | const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); | |
71 | const dst_rep_t dstNaNCode = dstQNaN - 1; | |
72 | ||
73 | // Break a into a sign and representation of the absolute value | |
74 | const src_rep_t aRep = srcToRep(a); | |
75 | const src_rep_t aAbs = aRep & srcAbsMask; | |
76 | const src_rep_t sign = aRep & srcSignMask; | |
77 | dst_rep_t absResult; | |
78 | ||
79 | if (aAbs - underflow < aAbs - overflow) { | |
80 | // The exponent of a is within the range of normal numbers in the | |
81 | // destination format. We can convert by simply right-shifting with | |
82 | // rounding and adjusting the exponent. | |
83 | absResult = aAbs >> (srcSigBits - dstSigBits); | |
84 | absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; | |
85 | ||
86 | const src_rep_t roundBits = aAbs & roundMask; | |
87 | // Round to nearest | |
88 | if (roundBits > halfway) | |
89 | absResult++; | |
90 | // Ties to even | |
91 | else if (roundBits == halfway) | |
92 | absResult += absResult & 1; | |
93 | } | |
94 | else if (aAbs > srcInfinity) { | |
95 | // a is NaN. | |
96 | // Conjure the result by beginning with infinity, setting the qNaN | |
97 | // bit and inserting the (truncated) trailing NaN field. | |
98 | absResult = (dst_rep_t)dstInfExp << dstSigBits; | |
99 | absResult |= dstQNaN; | |
100 | absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; | |
101 | } | |
102 | else if (aAbs >= overflow) { | |
103 | // a overflows to infinity. | |
104 | absResult = (dst_rep_t)dstInfExp << dstSigBits; | |
105 | } | |
106 | else { | |
107 | // a underflows on conversion to the destination type or is an exact | |
108 | // zero. The result may be a denormal or zero. Extract the exponent | |
109 | // to get the shift amount for the denormalization. | |
110 | const int aExp = aAbs >> srcSigBits; | |
111 | const int shift = srcExpBias - dstExpBias - aExp + 1; | |
112 | ||
113 | const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; | |
114 | ||
115 | // Right shift by the denormalization amount with sticky. | |
116 | if (shift > srcSigBits) { | |
117 | absResult = 0; | |
118 | } else { | |
119 | const bool sticky = significand << (srcBits - shift); | |
120 | src_rep_t denormalizedSignificand = significand >> shift | sticky; | |
121 | absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); | |
122 | const src_rep_t roundBits = denormalizedSignificand & roundMask; | |
123 | // Round to nearest | |
124 | if (roundBits > halfway) | |
125 | absResult++; | |
126 | // Ties to even | |
127 | else if (roundBits == halfway) | |
128 | absResult += absResult & 1; | |
129 | } | |
130 | } | |
131 | ||
132 | // Apply the signbit to (dst_t)abs(a). | |
133 | const dst_rep_t result = absResult | sign >> (srcBits - dstBits); | |
134 | return dstFromRep(result); | |
135 | } |