]>
Commit | Line | Data |
---|---|---|
d799c028 HL |
1 | /**\r |
2 | University of Illinois/NCSA\r | |
3 | Open Source License\r | |
4 | \r | |
5 | Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT\r | |
6 | \r | |
7 | All rights reserved.\r | |
8 | \r | |
9 | Developed by:\r | |
10 | \r | |
11 | LLVM Team\r | |
12 | \r | |
13 | University of Illinois at Urbana-Champaign\r | |
14 | \r | |
15 | http://llvm.org\r | |
16 | \r | |
17 | Permission is hereby granted, free of charge, to any person obtaining a copy of\r | |
18 | this software and associated documentation files (the "Software"), to deal with\r | |
19 | the Software without restriction, including without limitation the rights to\r | |
20 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies\r | |
21 | of the Software, and to permit persons to whom the Software is furnished to do\r | |
22 | so, subject to the following conditions:\r | |
23 | \r | |
24 | * Redistributions of source code must retain the above copyright notice,\r | |
25 | this list of conditions and the following disclaimers.\r | |
26 | \r | |
27 | * Redistributions in binary form must reproduce the above copyright notice,\r | |
28 | this list of conditions and the following disclaimers in the\r | |
29 | documentation and/or other materials provided with the distribution.\r | |
30 | \r | |
31 | * Neither the names of the LLVM Team, University of Illinois at\r | |
32 | Urbana-Champaign, nor the names of its contributors may be used to\r | |
33 | endorse or promote products derived from this Software without specific\r | |
34 | prior written permission.\r | |
35 | \r | |
36 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r | |
37 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS\r | |
38 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\r | |
39 | CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\r | |
40 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\r | |
41 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE\r | |
42 | SOFTWARE.\r | |
43 | **/\r | |
44 | \r | |
45 | #ifndef FP_LIB_HEADER\r | |
46 | #define FP_LIB_HEADER\r | |
47 | \r | |
48 | #include <stdint.h>\r | |
49 | #include <stdbool.h>\r | |
50 | #include <limits.h>\r | |
51 | #include "int_lib.h"\r | |
52 | \r | |
53 | #if defined SINGLE_PRECISION\r | |
54 | \r | |
55 | typedef uint32_t rep_t;\r | |
56 | typedef int32_t srep_t;\r | |
57 | typedef float fp_t;\r | |
58 | #define REP_C UINT32_C\r | |
59 | #define significandBits 23\r | |
60 | \r | |
61 | static inline int rep_clz(rep_t a) {\r | |
62 | return __builtin_clz(a);\r | |
63 | }\r | |
64 | \r | |
65 | // 32x32 --> 64 bit multiply\r | |
66 | static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {\r | |
67 | const uint64_t product = (uint64_t)a*b;\r | |
68 | *hi = product >> 32;\r | |
69 | *lo = product;\r | |
70 | }\r | |
71 | COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);\r | |
72 | \r | |
73 | #elif defined DOUBLE_PRECISION\r | |
74 | \r | |
75 | typedef uint64_t rep_t;\r | |
76 | typedef int64_t srep_t;\r | |
77 | typedef double fp_t;\r | |
78 | #define REP_C UINT64_C\r | |
79 | #define significandBits 52\r | |
80 | \r | |
81 | static inline int rep_clz(rep_t a) {\r | |
82 | #if defined __LP64__\r | |
83 | return __builtin_clzl(a);\r | |
84 | #else\r | |
85 | if (a & REP_C(0xffffffff00000000))\r | |
86 | return __builtin_clz(a >> 32);\r | |
87 | else\r | |
88 | return 32 + __builtin_clz(a & REP_C(0xffffffff));\r | |
89 | #endif\r | |
90 | }\r | |
91 | \r | |
92 | #define loWord(a) (a & 0xffffffffU)\r | |
93 | #define hiWord(a) (a >> 32)\r | |
94 | \r | |
95 | // 64x64 -> 128 wide multiply for platforms that don't have such an operation;\r | |
96 | // many 64-bit platforms have this operation, but they tend to have hardware\r | |
97 | // floating-point, so we don't bother with a special case for them here.\r | |
98 | static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {\r | |
99 | // Each of the component 32x32 -> 64 products\r | |
100 | const uint64_t plolo = loWord(a) * loWord(b);\r | |
101 | const uint64_t plohi = loWord(a) * hiWord(b);\r | |
102 | const uint64_t philo = hiWord(a) * loWord(b);\r | |
103 | const uint64_t phihi = hiWord(a) * hiWord(b);\r | |
104 | // Sum terms that contribute to lo in a way that allows us to get the carry\r | |
105 | const uint64_t r0 = loWord(plolo);\r | |
106 | const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo);\r | |
107 | *lo = r0 + (r1 << 32);\r | |
108 | // Sum terms contributing to hi with the carry from lo\r | |
109 | *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi;\r | |
110 | }\r | |
111 | #undef loWord\r | |
112 | #undef hiWord\r | |
113 | \r | |
114 | COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);\r | |
115 | \r | |
116 | #elif defined QUAD_PRECISION\r | |
117 | #if __LDBL_MANT_DIG__ == 113\r | |
118 | #define CRT_LDBL_128BIT\r | |
119 | typedef __uint128_t rep_t;\r | |
120 | typedef __int128_t srep_t;\r | |
121 | typedef long double fp_t;\r | |
122 | #define REP_C (__uint128_t)\r | |
123 | // Note: Since there is no explicit way to tell compiler the constant is a\r | |
124 | // 128-bit integer, we let the constant be casted to 128-bit integer\r | |
125 | #define significandBits 112\r | |
126 | \r | |
127 | static inline int rep_clz(rep_t a) {\r | |
128 | const union\r | |
129 | {\r | |
130 | __uint128_t ll;\r | |
131 | #if _YUGA_BIG_ENDIAN\r | |
132 | struct { uint64_t high, low; } s;\r | |
133 | #else\r | |
134 | struct { uint64_t low, high; } s;\r | |
135 | #endif\r | |
136 | } uu = { .ll = a };\r | |
137 | \r | |
138 | uint64_t word;\r | |
139 | uint64_t add;\r | |
140 | \r | |
141 | if (uu.s.high){\r | |
142 | word = uu.s.high;\r | |
143 | add = 0;\r | |
144 | }\r | |
145 | else{\r | |
146 | word = uu.s.low;\r | |
147 | add = 64;\r | |
148 | }\r | |
149 | return __builtin_clzll(word) + add;\r | |
150 | }\r | |
151 | \r | |
152 | #define Word_LoMask UINT64_C(0x00000000ffffffff)\r | |
153 | #define Word_HiMask UINT64_C(0xffffffff00000000)\r | |
154 | #define Word_FullMask UINT64_C(0xffffffffffffffff)\r | |
155 | #define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask)\r | |
156 | #define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask)\r | |
157 | #define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask)\r | |
158 | #define Word_4(a) (uint64_t)(a & Word_LoMask)\r | |
159 | \r | |
160 | // 128x128 -> 256 wide multiply for platforms that don't have such an operation;\r | |
161 | // many 64-bit platforms have this operation, but they tend to have hardware\r | |
162 | // floating-point, so we don't bother with a special case for them here.\r | |
163 | static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {\r | |
164 | \r | |
165 | const uint64_t product11 = Word_1(a) * Word_1(b);\r | |
166 | const uint64_t product12 = Word_1(a) * Word_2(b);\r | |
167 | const uint64_t product13 = Word_1(a) * Word_3(b);\r | |
168 | const uint64_t product14 = Word_1(a) * Word_4(b);\r | |
169 | const uint64_t product21 = Word_2(a) * Word_1(b);\r | |
170 | const uint64_t product22 = Word_2(a) * Word_2(b);\r | |
171 | const uint64_t product23 = Word_2(a) * Word_3(b);\r | |
172 | const uint64_t product24 = Word_2(a) * Word_4(b);\r | |
173 | const uint64_t product31 = Word_3(a) * Word_1(b);\r | |
174 | const uint64_t product32 = Word_3(a) * Word_2(b);\r | |
175 | const uint64_t product33 = Word_3(a) * Word_3(b);\r | |
176 | const uint64_t product34 = Word_3(a) * Word_4(b);\r | |
177 | const uint64_t product41 = Word_4(a) * Word_1(b);\r | |
178 | const uint64_t product42 = Word_4(a) * Word_2(b);\r | |
179 | const uint64_t product43 = Word_4(a) * Word_3(b);\r | |
180 | const uint64_t product44 = Word_4(a) * Word_4(b);\r | |
181 | \r | |
182 | const __uint128_t sum0 = (__uint128_t)product44;\r | |
183 | const __uint128_t sum1 = (__uint128_t)product34 +\r | |
184 | (__uint128_t)product43;\r | |
185 | const __uint128_t sum2 = (__uint128_t)product24 +\r | |
186 | (__uint128_t)product33 +\r | |
187 | (__uint128_t)product42;\r | |
188 | const __uint128_t sum3 = (__uint128_t)product14 +\r | |
189 | (__uint128_t)product23 +\r | |
190 | (__uint128_t)product32 +\r | |
191 | (__uint128_t)product41;\r | |
192 | const __uint128_t sum4 = (__uint128_t)product13 +\r | |
193 | (__uint128_t)product22 +\r | |
194 | (__uint128_t)product31;\r | |
195 | const __uint128_t sum5 = (__uint128_t)product12 +\r | |
196 | (__uint128_t)product21;\r | |
197 | const __uint128_t sum6 = (__uint128_t)product11;\r | |
198 | \r | |
199 | const __uint128_t r0 = (sum0 & Word_FullMask) +\r | |
200 | ((sum1 & Word_LoMask) << 32);\r | |
201 | const __uint128_t r1 = (sum0 >> 64) +\r | |
202 | ((sum1 >> 32) & Word_FullMask) +\r | |
203 | (sum2 & Word_FullMask) +\r | |
204 | ((sum3 << 32) & Word_HiMask);\r | |
205 | \r | |
206 | *lo = r0 + (r1 << 64);\r | |
207 | *hi = (r1 >> 64) +\r | |
208 | (sum1 >> 96) +\r | |
209 | (sum2 >> 64) +\r | |
210 | (sum3 >> 32) +\r | |
211 | sum4 +\r | |
212 | (sum5 << 32) +\r | |
213 | (sum6 << 64);\r | |
214 | }\r | |
215 | #undef Word_1\r | |
216 | #undef Word_2\r | |
217 | #undef Word_3\r | |
218 | #undef Word_4\r | |
219 | #undef Word_HiMask\r | |
220 | #undef Word_LoMask\r | |
221 | #undef Word_FullMask\r | |
222 | #endif // __LDBL_MANT_DIG__ == 113\r | |
223 | #else\r | |
224 | #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.\r | |
225 | #endif\r | |
226 | \r | |
227 | #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT)\r | |
228 | #define typeWidth (sizeof(rep_t)*CHAR_BIT)\r | |
229 | #define exponentBits (typeWidth - significandBits - 1)\r | |
230 | #define maxExponent ((1 << exponentBits) - 1)\r | |
231 | #define exponentBias (maxExponent >> 1)\r | |
232 | \r | |
233 | #define implicitBit (REP_C(1) << significandBits)\r | |
234 | #define significandMask (implicitBit - 1U)\r | |
235 | #define signBit (REP_C(1) << (significandBits + exponentBits))\r | |
236 | #define absMask (signBit - 1U)\r | |
237 | #define exponentMask (absMask ^ significandMask)\r | |
238 | #define oneRep ((rep_t)exponentBias << significandBits)\r | |
239 | #define infRep exponentMask\r | |
240 | #define quietBit (implicitBit >> 1)\r | |
241 | #define qnanRep (exponentMask | quietBit)\r | |
242 | \r | |
243 | static inline rep_t toRep(fp_t x) {\r | |
244 | const union { fp_t f; rep_t i; } rep = {.f = x};\r | |
245 | return rep.i;\r | |
246 | }\r | |
247 | \r | |
248 | static inline fp_t fromRep(rep_t x) {\r | |
249 | const union { fp_t f; rep_t i; } rep = {.i = x};\r | |
250 | return rep.f;\r | |
251 | }\r | |
252 | \r | |
253 | static inline int normalize(rep_t *significand) {\r | |
254 | const int shift = rep_clz(*significand) - rep_clz(implicitBit);\r | |
255 | *significand <<= shift;\r | |
256 | return 1 - shift;\r | |
257 | }\r | |
258 | \r | |
259 | static inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {\r | |
260 | *hi = *hi << count | *lo >> (typeWidth - count);\r | |
261 | *lo = *lo << count;\r | |
262 | }\r | |
263 | \r | |
264 | static inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) {\r | |
265 | if (count < typeWidth) {\r | |
266 | const bool sticky = *lo << (typeWidth - count);\r | |
267 | *lo = *hi << (typeWidth - count) | *lo >> count | sticky;\r | |
268 | *hi = *hi >> count;\r | |
269 | }\r | |
270 | else if (count < 2*typeWidth) {\r | |
271 | const bool sticky = *hi << (2*typeWidth - count) | *lo;\r | |
272 | *lo = *hi >> (count - typeWidth) | sticky;\r | |
273 | *hi = 0;\r | |
274 | } else {\r | |
275 | const bool sticky = *hi | *lo;\r | |
276 | *lo = sticky;\r | |
277 | *hi = 0;\r | |
278 | }\r | |
279 | }\r | |
280 | #endif\r | |
281 | \r | |
282 | #endif // FP_LIB_HEADER\r |