]>
Commit | Line | Data |
---|---|---|
1dbda2b4 AB |
1 | \r |
2 | /*\r | |
3 | ===============================================================================\r | |
4 | \r | |
5 | This C source fragment is part of the SoftFloat IEC/IEEE Floating-point\r | |
6 | Arithmetic Package, Release 2a.\r | |
7 | \r | |
8 | Written by John R. Hauser. This work was made possible in part by the\r | |
9 | International Computer Science Institute, located at Suite 600, 1947 Center\r | |
10 | Street, Berkeley, California 94704. Funding was partially provided by the\r | |
11 | National Science Foundation under grant MIP-9311980. The original version\r | |
12 | of this code was written as part of a project to build a fixed-point vector\r | |
13 | processor in collaboration with the University of California at Berkeley,\r | |
14 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r | |
15 | is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/\r | |
16 | arithmetic/SoftFloat.html'.\r | |
17 | \r | |
18 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort\r | |
19 | has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT\r | |
20 | TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO\r | |
21 | PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY\r | |
22 | AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.\r | |
23 | \r | |
24 | Derivative works are acceptable, even for commercial purposes, so long as\r | |
25 | (1) they include prominent notice that the work is derivative, and (2) they\r | |
26 | include prominent notice akin to these four paragraphs for those parts of\r | |
27 | this code that are retained.\r | |
28 | \r | |
29 | ===============================================================================\r | |
30 | */\r | |
31 | \r | |
32 | /*\r | |
33 | -------------------------------------------------------------------------------\r | |
34 | Shifts `a' right by the number of bits given in `count'. If any nonzero\r | |
35 | bits are shifted off, they are ``jammed'' into the least significant bit of\r | |
36 | the result by setting the least significant bit to 1. The value of `count'\r | |
37 | can be arbitrarily large; in particular, if `count' is greater than 32, the\r | |
38 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.\r | |
39 | The result is stored in the location pointed to by `zPtr'.\r | |
40 | -------------------------------------------------------------------------------\r | |
41 | */\r | |
42 | INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )\r | |
43 | {\r | |
44 | bits32 z;\r | |
45 | \r | |
46 | if ( count == 0 ) {\r | |
47 | z = a;\r | |
48 | }\r | |
49 | else if ( count < 32 ) {\r | |
50 | z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );\r | |
51 | }\r | |
52 | else {\r | |
53 | z = ( a != 0 );\r | |
54 | }\r | |
55 | *zPtr = z;\r | |
56 | \r | |
57 | }\r | |
58 | \r | |
59 | /*\r | |
60 | -------------------------------------------------------------------------------\r | |
61 | Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\r | |
62 | number of bits given in `count'. Any bits shifted off are lost. The value\r | |
63 | of `count' can be arbitrarily large; in particular, if `count' is greater\r | |
64 | than 64, the result will be 0. The result is broken into two 32-bit pieces\r | |
65 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
66 | -------------------------------------------------------------------------------\r | |
67 | */\r | |
68 | INLINE void\r | |
69 | shift64Right(\r | |
70 | bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )\r | |
71 | {\r | |
72 | bits32 z0, z1;\r | |
73 | int8 negCount = ( - count ) & 31;\r | |
74 | \r | |
75 | if ( count == 0 ) {\r | |
76 | z1 = a1;\r | |
77 | z0 = a0;\r | |
78 | }\r | |
79 | else if ( count < 32 ) {\r | |
80 | z1 = ( a0<<negCount ) | ( a1>>count );\r | |
81 | z0 = a0>>count;\r | |
82 | }\r | |
83 | else {\r | |
84 | z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0;\r | |
85 | z0 = 0;\r | |
86 | }\r | |
87 | *z1Ptr = z1;\r | |
88 | *z0Ptr = z0;\r | |
89 | \r | |
90 | }\r | |
91 | \r | |
92 | /*\r | |
93 | -------------------------------------------------------------------------------\r | |
94 | Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the\r | |
95 | number of bits given in `count'. If any nonzero bits are shifted off, they\r | |
96 | are ``jammed'' into the least significant bit of the result by setting the\r | |
97 | least significant bit to 1. The value of `count' can be arbitrarily large;\r | |
98 | in particular, if `count' is greater than 64, the result will be either 0\r | |
99 | or 1, depending on whether the concatenation of `a0' and `a1' is zero or\r | |
100 | nonzero. The result is broken into two 32-bit pieces which are stored at\r | |
101 | the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
102 | -------------------------------------------------------------------------------\r | |
103 | */\r | |
104 | INLINE void\r | |
105 | shift64RightJamming(\r | |
106 | bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )\r | |
107 | {\r | |
108 | bits32 z0, z1;\r | |
109 | int8 negCount = ( - count ) & 31;\r | |
110 | \r | |
111 | if ( count == 0 ) {\r | |
112 | z1 = a1;\r | |
113 | z0 = a0;\r | |
114 | }\r | |
115 | else if ( count < 32 ) {\r | |
116 | z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );\r | |
117 | z0 = a0>>count;\r | |
118 | }\r | |
119 | else {\r | |
120 | if ( count == 32 ) {\r | |
121 | z1 = a0 | ( a1 != 0 );\r | |
122 | }\r | |
123 | else if ( count < 64 ) {\r | |
124 | z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );\r | |
125 | }\r | |
126 | else {\r | |
127 | z1 = ( ( a0 | a1 ) != 0 );\r | |
128 | }\r | |
129 | z0 = 0;\r | |
130 | }\r | |
131 | *z1Ptr = z1;\r | |
132 | *z0Ptr = z0;\r | |
133 | \r | |
134 | }\r | |
135 | \r | |
136 | /*\r | |
137 | -------------------------------------------------------------------------------\r | |
138 | Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right\r | |
139 | by 32 _plus_ the number of bits given in `count'. The shifted result is\r | |
140 | at most 64 nonzero bits; these are broken into two 32-bit pieces which are\r | |
141 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted\r | |
142 | off form a third 32-bit result as follows: The _last_ bit shifted off is\r | |
143 | the most-significant bit of the extra result, and the other 31 bits of the\r | |
144 | extra result are all zero if and only if _all_but_the_last_ bits shifted off\r | |
145 | were all zero. This extra result is stored in the location pointed to by\r | |
146 | `z2Ptr'. The value of `count' can be arbitrarily large.\r | |
147 | (This routine makes more sense if `a0', `a1', and `a2' are considered\r | |
148 | to form a fixed-point value with binary point between `a1' and `a2'. This\r | |
149 | fixed-point value is shifted right by the number of bits given in `count',\r | |
150 | and the integer part of the result is returned at the locations pointed to\r | |
151 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly\r | |
152 | corrupted as described above, and is returned at the location pointed to by\r | |
153 | `z2Ptr'.)\r | |
154 | -------------------------------------------------------------------------------\r | |
155 | */\r | |
156 | INLINE void\r | |
157 | shift64ExtraRightJamming(\r | |
158 | bits32 a0,\r | |
159 | bits32 a1,\r | |
160 | bits32 a2,\r | |
161 | int16 count,\r | |
162 | bits32 *z0Ptr,\r | |
163 | bits32 *z1Ptr,\r | |
164 | bits32 *z2Ptr\r | |
165 | )\r | |
166 | {\r | |
167 | bits32 z0, z1, z2;\r | |
168 | int8 negCount = ( - count ) & 31;\r | |
169 | \r | |
170 | if ( count == 0 ) {\r | |
171 | z2 = a2;\r | |
172 | z1 = a1;\r | |
173 | z0 = a0;\r | |
174 | }\r | |
175 | else {\r | |
176 | if ( count < 32 ) {\r | |
177 | z2 = a1<<negCount;\r | |
178 | z1 = ( a0<<negCount ) | ( a1>>count );\r | |
179 | z0 = a0>>count;\r | |
180 | }\r | |
181 | else {\r | |
182 | if ( count == 32 ) {\r | |
183 | z2 = a1;\r | |
184 | z1 = a0;\r | |
185 | }\r | |
186 | else {\r | |
187 | a2 |= a1;\r | |
188 | if ( count < 64 ) {\r | |
189 | z2 = a0<<negCount;\r | |
190 | z1 = a0>>( count & 31 );\r | |
191 | }\r | |
192 | else {\r | |
193 | z2 = ( count == 64 ) ? a0 : ( a0 != 0 );\r | |
194 | z1 = 0;\r | |
195 | }\r | |
196 | }\r | |
197 | z0 = 0;\r | |
198 | }\r | |
199 | z2 |= ( a2 != 0 );\r | |
200 | }\r | |
201 | *z2Ptr = z2;\r | |
202 | *z1Ptr = z1;\r | |
203 | *z0Ptr = z0;\r | |
204 | \r | |
205 | }\r | |
206 | \r | |
207 | /*\r | |
208 | -------------------------------------------------------------------------------\r | |
209 | Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the\r | |
210 | number of bits given in `count'. Any bits shifted off are lost. The value\r | |
211 | of `count' must be less than 32. The result is broken into two 32-bit\r | |
212 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
213 | -------------------------------------------------------------------------------\r | |
214 | */\r | |
215 | INLINE void\r | |
216 | shortShift64Left(\r | |
217 | bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr )\r | |
218 | {\r | |
219 | \r | |
220 | *z1Ptr = a1<<count;\r | |
221 | *z0Ptr =\r | |
222 | ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );\r | |
223 | \r | |
224 | }\r | |
225 | \r | |
226 | /*\r | |
227 | -------------------------------------------------------------------------------\r | |
228 | Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left\r | |
229 | by the number of bits given in `count'. Any bits shifted off are lost.\r | |
230 | The value of `count' must be less than 32. The result is broken into three\r | |
231 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',\r | |
232 | `z1Ptr', and `z2Ptr'.\r | |
233 | -------------------------------------------------------------------------------\r | |
234 | */\r | |
235 | INLINE void\r | |
236 | shortShift96Left(\r | |
237 | bits32 a0,\r | |
238 | bits32 a1,\r | |
239 | bits32 a2,\r | |
240 | int16 count,\r | |
241 | bits32 *z0Ptr,\r | |
242 | bits32 *z1Ptr,\r | |
243 | bits32 *z2Ptr\r | |
244 | )\r | |
245 | {\r | |
246 | bits32 z0, z1, z2;\r | |
247 | int8 negCount;\r | |
248 | \r | |
249 | z2 = a2<<count;\r | |
250 | z1 = a1<<count;\r | |
251 | z0 = a0<<count;\r | |
252 | if ( 0 < count ) {\r | |
253 | negCount = ( ( - count ) & 31 );\r | |
254 | z1 |= a2>>negCount;\r | |
255 | z0 |= a1>>negCount;\r | |
256 | }\r | |
257 | *z2Ptr = z2;\r | |
258 | *z1Ptr = z1;\r | |
259 | *z0Ptr = z0;\r | |
260 | \r | |
261 | }\r | |
262 | \r | |
263 | /*\r | |
264 | -------------------------------------------------------------------------------\r | |
265 | Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit\r | |
266 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so\r | |
267 | any carry out is lost. The result is broken into two 32-bit pieces which\r | |
268 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
269 | -------------------------------------------------------------------------------\r | |
270 | */\r | |
271 | INLINE void\r | |
272 | add64(\r | |
273 | bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )\r | |
274 | {\r | |
275 | bits32 z1;\r | |
276 | \r | |
277 | z1 = a1 + b1;\r | |
278 | *z1Ptr = z1;\r | |
279 | *z0Ptr = a0 + b0 + ( z1 < a1 );\r | |
280 | \r | |
281 | }\r | |
282 | \r | |
283 | /*\r | |
284 | -------------------------------------------------------------------------------\r | |
285 | Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the\r | |
286 | 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is\r | |
287 | modulo 2^96, so any carry out is lost. The result is broken into three\r | |
288 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr',\r | |
289 | `z1Ptr', and `z2Ptr'.\r | |
290 | -------------------------------------------------------------------------------\r | |
291 | */\r | |
292 | INLINE void\r | |
293 | add96(\r | |
294 | bits32 a0,\r | |
295 | bits32 a1,\r | |
296 | bits32 a2,\r | |
297 | bits32 b0,\r | |
298 | bits32 b1,\r | |
299 | bits32 b2,\r | |
300 | bits32 *z0Ptr,\r | |
301 | bits32 *z1Ptr,\r | |
302 | bits32 *z2Ptr\r | |
303 | )\r | |
304 | {\r | |
305 | bits32 z0, z1, z2;\r | |
306 | int8 carry0, carry1;\r | |
307 | \r | |
308 | z2 = a2 + b2;\r | |
309 | carry1 = ( z2 < a2 );\r | |
310 | z1 = a1 + b1;\r | |
311 | carry0 = ( z1 < a1 );\r | |
312 | z0 = a0 + b0;\r | |
313 | z1 += carry1;\r | |
314 | z0 += ( z1 < (bits32)carry1 );\r | |
315 | z0 += carry0;\r | |
316 | *z2Ptr = z2;\r | |
317 | *z1Ptr = z1;\r | |
318 | *z0Ptr = z0;\r | |
319 | \r | |
320 | }\r | |
321 | \r | |
322 | /*\r | |
323 | -------------------------------------------------------------------------------\r | |
324 | Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the\r | |
325 | 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo\r | |
326 | 2^64, so any borrow out (carry out) is lost. The result is broken into two\r | |
327 | 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and\r | |
328 | `z1Ptr'.\r | |
329 | -------------------------------------------------------------------------------\r | |
330 | */\r | |
331 | INLINE void\r | |
332 | sub64(\r | |
333 | bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr )\r | |
334 | {\r | |
335 | \r | |
336 | *z1Ptr = a1 - b1;\r | |
337 | *z0Ptr = a0 - b0 - ( a1 < b1 );\r | |
338 | \r | |
339 | }\r | |
340 | \r | |
341 | /*\r | |
342 | -------------------------------------------------------------------------------\r | |
343 | Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from\r | |
344 | the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction\r | |
345 | is modulo 2^96, so any borrow out (carry out) is lost. The result is broken\r | |
346 | into three 32-bit pieces which are stored at the locations pointed to by\r | |
347 | `z0Ptr', `z1Ptr', and `z2Ptr'.\r | |
348 | -------------------------------------------------------------------------------\r | |
349 | */\r | |
350 | INLINE void\r | |
351 | sub96(\r | |
352 | bits32 a0,\r | |
353 | bits32 a1,\r | |
354 | bits32 a2,\r | |
355 | bits32 b0,\r | |
356 | bits32 b1,\r | |
357 | bits32 b2,\r | |
358 | bits32 *z0Ptr,\r | |
359 | bits32 *z1Ptr,\r | |
360 | bits32 *z2Ptr\r | |
361 | )\r | |
362 | {\r | |
363 | bits32 z0, z1, z2;\r | |
364 | int8 borrow0, borrow1;\r | |
365 | \r | |
366 | z2 = a2 - b2;\r | |
367 | borrow1 = ( a2 < b2 );\r | |
368 | z1 = a1 - b1;\r | |
369 | borrow0 = ( a1 < b1 );\r | |
370 | z0 = a0 - b0;\r | |
371 | z0 -= ( z1 < (bits32)borrow1 );\r | |
372 | z1 -= borrow1;\r | |
373 | z0 -= borrow0;\r | |
374 | *z2Ptr = z2;\r | |
375 | *z1Ptr = z1;\r | |
376 | *z0Ptr = z0;\r | |
377 | \r | |
378 | }\r | |
379 | \r | |
380 | /*\r | |
381 | -------------------------------------------------------------------------------\r | |
382 | Multiplies `a' by `b' to obtain a 64-bit product. The product is broken\r | |
383 | into two 32-bit pieces which are stored at the locations pointed to by\r | |
384 | `z0Ptr' and `z1Ptr'.\r | |
385 | -------------------------------------------------------------------------------\r | |
386 | */\r | |
387 | INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr )\r | |
388 | {\r | |
389 | bits16 aHigh, aLow, bHigh, bLow;\r | |
390 | bits32 z0, zMiddleA, zMiddleB, z1;\r | |
391 | \r | |
392 | aLow = a;\r | |
393 | aHigh = a>>16;\r | |
394 | bLow = b;\r | |
395 | bHigh = b>>16;\r | |
396 | z1 = ( (bits32) aLow ) * bLow;\r | |
397 | zMiddleA = ( (bits32) aLow ) * bHigh;\r | |
398 | zMiddleB = ( (bits32) aHigh ) * bLow;\r | |
399 | z0 = ( (bits32) aHigh ) * bHigh;\r | |
400 | zMiddleA += zMiddleB;\r | |
401 | z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );\r | |
402 | zMiddleA <<= 16;\r | |
403 | z1 += zMiddleA;\r | |
404 | z0 += ( z1 < zMiddleA );\r | |
405 | *z1Ptr = z1;\r | |
406 | *z0Ptr = z0;\r | |
407 | \r | |
408 | }\r | |
409 | \r | |
410 | /*\r | |
411 | -------------------------------------------------------------------------------\r | |
412 | Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'\r | |
413 | to obtain a 96-bit product. The product is broken into three 32-bit pieces\r | |
414 | which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and\r | |
415 | `z2Ptr'.\r | |
416 | -------------------------------------------------------------------------------\r | |
417 | */\r | |
418 | INLINE void\r | |
419 | mul64By32To96(\r | |
420 | bits32 a0,\r | |
421 | bits32 a1,\r | |
422 | bits32 b,\r | |
423 | bits32 *z0Ptr,\r | |
424 | bits32 *z1Ptr,\r | |
425 | bits32 *z2Ptr\r | |
426 | )\r | |
427 | {\r | |
428 | bits32 z0, z1, z2, more1;\r | |
429 | \r | |
430 | mul32To64( a1, b, &z1, &z2 );\r | |
431 | mul32To64( a0, b, &z0, &more1 );\r | |
432 | add64( z0, more1, 0, z1, &z0, &z1 );\r | |
433 | *z2Ptr = z2;\r | |
434 | *z1Ptr = z1;\r | |
435 | *z0Ptr = z0;\r | |
436 | \r | |
437 | }\r | |
438 | \r | |
439 | /*\r | |
440 | -------------------------------------------------------------------------------\r | |
441 | Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the\r | |
442 | 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit\r | |
443 | product. The product is broken into four 32-bit pieces which are stored at\r | |
444 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.\r | |
445 | -------------------------------------------------------------------------------\r | |
446 | */\r | |
447 | INLINE void\r | |
448 | mul64To128(\r | |
449 | bits32 a0,\r | |
450 | bits32 a1,\r | |
451 | bits32 b0,\r | |
452 | bits32 b1,\r | |
453 | bits32 *z0Ptr,\r | |
454 | bits32 *z1Ptr,\r | |
455 | bits32 *z2Ptr,\r | |
456 | bits32 *z3Ptr\r | |
457 | )\r | |
458 | {\r | |
459 | bits32 z0, z1, z2, z3;\r | |
460 | bits32 more1, more2;\r | |
461 | \r | |
462 | mul32To64( a1, b1, &z2, &z3 );\r | |
463 | mul32To64( a1, b0, &z1, &more2 );\r | |
464 | add64( z1, more2, 0, z2, &z1, &z2 );\r | |
465 | mul32To64( a0, b0, &z0, &more1 );\r | |
466 | add64( z0, more1, 0, z1, &z0, &z1 );\r | |
467 | mul32To64( a0, b1, &more1, &more2 );\r | |
468 | add64( more1, more2, 0, z2, &more1, &z2 );\r | |
469 | add64( z0, z1, 0, more1, &z0, &z1 );\r | |
470 | *z3Ptr = z3;\r | |
471 | *z2Ptr = z2;\r | |
472 | *z1Ptr = z1;\r | |
473 | *z0Ptr = z0;\r | |
474 | \r | |
475 | }\r | |
476 | \r | |
477 | /*\r | |
478 | -------------------------------------------------------------------------------\r | |
479 | Returns an approximation to the 32-bit integer quotient obtained by dividing\r | |
480 | `b' into the 64-bit value formed by concatenating `a0' and `a1'. The\r | |
481 | divisor `b' must be at least 2^31. If q is the exact quotient truncated\r | |
482 | toward zero, the approximation returned lies between q and q + 2 inclusive.\r | |
483 | If the exact quotient q is larger than 32 bits, the maximum positive 32-bit\r | |
484 | unsigned integer is returned.\r | |
485 | -------------------------------------------------------------------------------\r | |
486 | */\r | |
487 | static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b )\r | |
488 | {\r | |
489 | bits32 b0, b1;\r | |
490 | bits32 rem0, rem1, term0, term1;\r | |
491 | bits32 z;\r | |
492 | \r | |
493 | if ( b <= a0 ) return 0xFFFFFFFF;\r | |
494 | b0 = b>>16;\r | |
495 | z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16;\r | |
496 | mul32To64( b, z, &term0, &term1 );\r | |
497 | sub64( a0, a1, term0, term1, &rem0, &rem1 );\r | |
498 | while ( ( (sbits32) rem0 ) < 0 ) {\r | |
499 | z -= 0x10000;\r | |
500 | b1 = b<<16;\r | |
501 | add64( rem0, rem1, b0, b1, &rem0, &rem1 );\r | |
502 | }\r | |
503 | rem0 = ( rem0<<16 ) | ( rem1>>16 );\r | |
504 | z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0;\r | |
505 | return z;\r | |
506 | \r | |
507 | }\r | |
508 | \r | |
509 | #ifndef SOFTFLOAT_FOR_GCC\r | |
510 | /*\r | |
511 | -------------------------------------------------------------------------------\r | |
512 | Returns an approximation to the square root of the 32-bit significand given\r | |
513 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of\r | |
514 | `aExp' (the least significant bit) is 1, the integer returned approximates\r | |
515 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'\r | |
516 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either\r | |
517 | case, the approximation returned lies strictly within +/-2 of the exact\r | |
518 | value.\r | |
519 | -------------------------------------------------------------------------------\r | |
520 | */\r | |
521 | static bits32 estimateSqrt32( int16 aExp, bits32 a )\r | |
522 | {\r | |
523 | static const bits16 sqrtOddAdjustments[] = {\r | |
524 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,\r | |
525 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67\r | |
526 | };\r | |
527 | static const bits16 sqrtEvenAdjustments[] = {\r | |
528 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,\r | |
529 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002\r | |
530 | };\r | |
531 | int8 index;\r | |
532 | bits32 z;\r | |
533 | \r | |
534 | index = ( a>>27 ) & 15;\r | |
535 | if ( aExp & 1 ) {\r | |
536 | z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];\r | |
537 | z = ( ( a / z )<<14 ) + ( z<<15 );\r | |
538 | a >>= 1;\r | |
539 | }\r | |
540 | else {\r | |
541 | z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];\r | |
542 | z = a / z + z;\r | |
543 | z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );\r | |
544 | if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );\r | |
545 | }\r | |
546 | return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 );\r | |
547 | \r | |
548 | }\r | |
549 | #endif\r | |
550 | \r | |
551 | /*\r | |
552 | -------------------------------------------------------------------------------\r | |
553 | Returns the number of leading 0 bits before the most-significant 1 bit of\r | |
554 | `a'. If `a' is zero, 32 is returned.\r | |
555 | -------------------------------------------------------------------------------\r | |
556 | */\r | |
557 | static int8 countLeadingZeros32( bits32 a )\r | |
558 | {\r | |
559 | static const int8 countLeadingZerosHigh[] = {\r | |
560 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,\r | |
561 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
562 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
563 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
564 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
565 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
566 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
567 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
568 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
569 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
570 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
571 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
572 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
573 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
574 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
575 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r | |
576 | };\r | |
577 | int8 shiftCount;\r | |
578 | \r | |
579 | shiftCount = 0;\r | |
580 | if ( a < 0x10000 ) {\r | |
581 | shiftCount += 16;\r | |
582 | a <<= 16;\r | |
583 | }\r | |
584 | if ( a < 0x1000000 ) {\r | |
585 | shiftCount += 8;\r | |
586 | a <<= 8;\r | |
587 | }\r | |
588 | shiftCount += countLeadingZerosHigh[ a>>24 ];\r | |
589 | return shiftCount;\r | |
590 | \r | |
591 | }\r | |
592 | \r | |
593 | /*\r | |
594 | -------------------------------------------------------------------------------\r | |
595 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is\r | |
596 | equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,\r | |
597 | returns 0.\r | |
598 | -------------------------------------------------------------------------------\r | |
599 | */\r | |
600 | INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )\r | |
601 | {\r | |
602 | \r | |
603 | return ( a0 == b0 ) && ( a1 == b1 );\r | |
604 | \r | |
605 | }\r | |
606 | \r | |
607 | /*\r | |
608 | -------------------------------------------------------------------------------\r | |
609 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less\r | |
610 | than or equal to the 64-bit value formed by concatenating `b0' and `b1'.\r | |
611 | Otherwise, returns 0.\r | |
612 | -------------------------------------------------------------------------------\r | |
613 | */\r | |
614 | INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )\r | |
615 | {\r | |
616 | \r | |
617 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );\r | |
618 | \r | |
619 | }\r | |
620 | \r | |
621 | /*\r | |
622 | -------------------------------------------------------------------------------\r | |
623 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less\r | |
624 | than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,\r | |
625 | returns 0.\r | |
626 | -------------------------------------------------------------------------------\r | |
627 | */\r | |
628 | INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )\r | |
629 | {\r | |
630 | \r | |
631 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );\r | |
632 | \r | |
633 | }\r | |
634 | \r | |
635 | /*\r | |
636 | -------------------------------------------------------------------------------\r | |
637 | Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not\r | |
638 | equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,\r | |
639 | returns 0.\r | |
640 | -------------------------------------------------------------------------------\r | |
641 | */\r | |
642 | INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 )\r | |
643 | {\r | |
644 | \r | |
645 | return ( a0 != b0 ) || ( a1 != b1 );\r | |
646 | \r | |
647 | }\r | |
648 | \r |