]>
Commit | Line | Data |
---|---|---|
3352b62b N |
1 | /* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */\r |
2 | \r | |
3 | /*\r | |
4 | ===============================================================================\r | |
5 | \r | |
6 | This C source fragment is part of the SoftFloat IEC/IEEE Floating-point\r | |
7 | Arithmetic Package, Release 2a.\r | |
8 | \r | |
9 | Written by John R. Hauser. This work was made possible in part by the\r | |
10 | International Computer Science Institute, located at Suite 600, 1947 Center\r | |
11 | Street, Berkeley, California 94704. Funding was partially provided by the\r | |
12 | National Science Foundation under grant MIP-9311980. The original version\r | |
13 | of this code was written as part of a project to build a fixed-point vector\r | |
14 | processor in collaboration with the University of California at Berkeley,\r | |
15 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r | |
16 | is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/\r | |
17 | arithmetic/SoftFloat.html'.\r | |
18 | \r | |
19 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort\r | |
20 | has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT\r | |
21 | TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO\r | |
22 | PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY\r | |
23 | AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.\r | |
24 | \r | |
25 | Derivative works are acceptable, even for commercial purposes, so long as\r | |
26 | (1) they include prominent notice that the work is derivative, and (2) they\r | |
27 | include prominent notice akin to these four paragraphs for those parts of\r | |
28 | this code that are retained.\r | |
29 | \r | |
30 | ===============================================================================\r | |
31 | */\r | |
32 | \r | |
33 | /*\r | |
34 | -------------------------------------------------------------------------------\r | |
35 | Shifts `a' right by the number of bits given in `count'. If any nonzero\r | |
36 | bits are shifted off, they are ``jammed'' into the least significant bit of\r | |
37 | the result by setting the least significant bit to 1. The value of `count'\r | |
38 | can be arbitrarily large; in particular, if `count' is greater than 32, the\r | |
39 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.\r | |
40 | The result is stored in the location pointed to by `zPtr'.\r | |
41 | -------------------------------------------------------------------------------\r | |
42 | */\r | |
43 | INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )\r | |
44 | {\r | |
45 | bits32 z;\r | |
46 | \r | |
47 | if ( count == 0 ) {\r | |
48 | z = a;\r | |
49 | }\r | |
50 | else if ( count < 32 ) {\r | |
51 | z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );\r | |
52 | }\r | |
53 | else {\r | |
54 | z = ( a != 0 );\r | |
55 | }\r | |
56 | *zPtr = z;\r | |
57 | \r | |
58 | }\r | |
59 | \r | |
60 | /*\r | |
61 | -------------------------------------------------------------------------------\r | |
62 | Shifts `a' right by the number of bits given in `count'. If any nonzero\r | |
63 | bits are shifted off, they are ``jammed'' into the least significant bit of\r | |
64 | the result by setting the least significant bit to 1. The value of `count'\r | |
65 | can be arbitrarily large; in particular, if `count' is greater than 64, the\r | |
66 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.\r | |
67 | The result is stored in the location pointed to by `zPtr'.\r | |
68 | -------------------------------------------------------------------------------\r | |
69 | */\r | |
70 | INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )\r | |
71 | {\r | |
72 | bits64 z;\r | |
73 | \r | |
74 | if ( count == 0 ) {\r | |
75 | z = a;\r | |
76 | }\r | |
77 | else if ( count < 64 ) {\r | |
78 | z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );\r | |
79 | }\r | |
80 | else {\r | |
81 | z = ( a != 0 );\r | |
82 | }\r | |
83 | *zPtr = z;\r | |
84 | \r | |
85 | }\r | |
86 | \r | |
87 | /*\r | |
88 | -------------------------------------------------------------------------------\r | |
89 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64\r | |
90 | _plus_ the number of bits given in `count'. The shifted result is at most\r | |
91 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The\r | |
92 | bits shifted off form a second 64-bit result as follows: The _last_ bit\r | |
93 | shifted off is the most-significant bit of the extra result, and the other\r | |
94 | 63 bits of the extra result are all zero if and only if _all_but_the_last_\r | |
95 | bits shifted off were all zero. This extra result is stored in the location\r | |
96 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.\r | |
97 | (This routine makes more sense if `a0' and `a1' are considered to form a\r | |
98 | fixed-point value with binary point between `a0' and `a1'. This fixed-point\r | |
99 | value is shifted right by the number of bits given in `count', and the\r | |
100 | integer part of the result is returned at the location pointed to by\r | |
101 | `z0Ptr'. The fractional part of the result may be slightly corrupted as\r | |
102 | described above, and is returned at the location pointed to by `z1Ptr'.)\r | |
103 | -------------------------------------------------------------------------------\r | |
104 | */\r | |
105 | INLINE void\r | |
106 | shift64ExtraRightJamming(\r | |
107 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
108 | {\r | |
109 | bits64 z0, z1;\r | |
110 | int8 negCount = ( - count ) & 63;\r | |
111 | \r | |
112 | if ( count == 0 ) {\r | |
113 | z1 = a1;\r | |
114 | z0 = a0;\r | |
115 | }\r | |
116 | else if ( count < 64 ) {\r | |
117 | z1 = ( a0<<negCount ) | ( a1 != 0 );\r | |
118 | z0 = a0>>count;\r | |
119 | }\r | |
120 | else {\r | |
121 | if ( count == 64 ) {\r | |
122 | z1 = a0 | ( a1 != 0 );\r | |
123 | }\r | |
124 | else {\r | |
125 | z1 = ( ( a0 | a1 ) != 0 );\r | |
126 | }\r | |
127 | z0 = 0;\r | |
128 | }\r | |
129 | *z1Ptr = z1;\r | |
130 | *z0Ptr = z0;\r | |
131 | \r | |
132 | }\r | |
133 | \r | |
134 | /*\r | |
135 | -------------------------------------------------------------------------------\r | |
136 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the\r | |
137 | number of bits given in `count'. Any bits shifted off are lost. The value\r | |
138 | of `count' can be arbitrarily large; in particular, if `count' is greater\r | |
139 | than 128, the result will be 0. The result is broken into two 64-bit pieces\r | |
140 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
141 | -------------------------------------------------------------------------------\r | |
142 | */\r | |
143 | INLINE void\r | |
144 | shift128Right(\r | |
145 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
146 | {\r | |
147 | bits64 z0, z1;\r | |
148 | int8 negCount = ( - count ) & 63;\r | |
149 | \r | |
150 | if ( count == 0 ) {\r | |
151 | z1 = a1;\r | |
152 | z0 = a0;\r | |
153 | }\r | |
154 | else if ( count < 64 ) {\r | |
155 | z1 = ( a0<<negCount ) | ( a1>>count );\r | |
156 | z0 = a0>>count;\r | |
157 | }\r | |
158 | else {\r | |
159 | z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;\r | |
160 | z0 = 0;\r | |
161 | }\r | |
162 | *z1Ptr = z1;\r | |
163 | *z0Ptr = z0;\r | |
164 | \r | |
165 | }\r | |
166 | \r | |
167 | /*\r | |
168 | -------------------------------------------------------------------------------\r | |
169 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the\r | |
170 | number of bits given in `count'. If any nonzero bits are shifted off, they\r | |
171 | are ``jammed'' into the least significant bit of the result by setting the\r | |
172 | least significant bit to 1. The value of `count' can be arbitrarily large;\r | |
173 | in particular, if `count' is greater than 128, the result will be either\r | |
174 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or\r | |
175 | nonzero. The result is broken into two 64-bit pieces which are stored at\r | |
176 | the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
177 | -------------------------------------------------------------------------------\r | |
178 | */\r | |
179 | INLINE void\r | |
180 | shift128RightJamming(\r | |
181 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
182 | {\r | |
183 | bits64 z0, z1;\r | |
184 | int8 negCount = ( - count ) & 63;\r | |
185 | \r | |
186 | if ( count == 0 ) {\r | |
187 | z1 = a1;\r | |
188 | z0 = a0;\r | |
189 | }\r | |
190 | else if ( count < 64 ) {\r | |
191 | z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );\r | |
192 | z0 = a0>>count;\r | |
193 | }\r | |
194 | else {\r | |
195 | if ( count == 64 ) {\r | |
196 | z1 = a0 | ( a1 != 0 );\r | |
197 | }\r | |
198 | else if ( count < 128 ) {\r | |
199 | z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );\r | |
200 | }\r | |
201 | else {\r | |
202 | z1 = ( ( a0 | a1 ) != 0 );\r | |
203 | }\r | |
204 | z0 = 0;\r | |
205 | }\r | |
206 | *z1Ptr = z1;\r | |
207 | *z0Ptr = z0;\r | |
208 | \r | |
209 | }\r | |
210 | \r | |
211 | /*\r | |
212 | -------------------------------------------------------------------------------\r | |
213 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right\r | |
214 | by 64 _plus_ the number of bits given in `count'. The shifted result is\r | |
215 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are\r | |
216 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted\r | |
217 | off form a third 64-bit result as follows: The _last_ bit shifted off is\r | |
218 | the most-significant bit of the extra result, and the other 63 bits of the\r | |
219 | extra result are all zero if and only if _all_but_the_last_ bits shifted off\r | |
220 | were all zero. This extra result is stored in the location pointed to by\r | |
221 | `z2Ptr'. The value of `count' can be arbitrarily large.\r | |
222 | (This routine makes more sense if `a0', `a1', and `a2' are considered\r | |
223 | to form a fixed-point value with binary point between `a1' and `a2'. This\r | |
224 | fixed-point value is shifted right by the number of bits given in `count',\r | |
225 | and the integer part of the result is returned at the locations pointed to\r | |
226 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly\r | |
227 | corrupted as described above, and is returned at the location pointed to by\r | |
228 | `z2Ptr'.)\r | |
229 | -------------------------------------------------------------------------------\r | |
230 | */\r | |
231 | INLINE void\r | |
232 | shift128ExtraRightJamming(\r | |
233 | bits64 a0,\r | |
234 | bits64 a1,\r | |
235 | bits64 a2,\r | |
236 | int16 count,\r | |
237 | bits64 *z0Ptr,\r | |
238 | bits64 *z1Ptr,\r | |
239 | bits64 *z2Ptr\r | |
240 | )\r | |
241 | {\r | |
242 | bits64 z0, z1, z2;\r | |
243 | int8 negCount = ( - count ) & 63;\r | |
244 | \r | |
245 | if ( count == 0 ) {\r | |
246 | z2 = a2;\r | |
247 | z1 = a1;\r | |
248 | z0 = a0;\r | |
249 | }\r | |
250 | else {\r | |
251 | if ( count < 64 ) {\r | |
252 | z2 = a1<<negCount;\r | |
253 | z1 = ( a0<<negCount ) | ( a1>>count );\r | |
254 | z0 = a0>>count;\r | |
255 | }\r | |
256 | else {\r | |
257 | if ( count == 64 ) {\r | |
258 | z2 = a1;\r | |
259 | z1 = a0;\r | |
260 | }\r | |
261 | else {\r | |
262 | a2 |= a1;\r | |
263 | if ( count < 128 ) {\r | |
264 | z2 = a0<<negCount;\r | |
265 | z1 = a0>>( count & 63 );\r | |
266 | }\r | |
267 | else {\r | |
268 | z2 = ( count == 128 ) ? a0 : ( a0 != 0 );\r | |
269 | z1 = 0;\r | |
270 | }\r | |
271 | }\r | |
272 | z0 = 0;\r | |
273 | }\r | |
274 | z2 |= ( a2 != 0 );\r | |
275 | }\r | |
276 | *z2Ptr = z2;\r | |
277 | *z1Ptr = z1;\r | |
278 | *z0Ptr = z0;\r | |
279 | \r | |
280 | }\r | |
281 | \r | |
282 | /*\r | |
283 | -------------------------------------------------------------------------------\r | |
284 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the\r | |
285 | number of bits given in `count'. Any bits shifted off are lost. The value\r | |
286 | of `count' must be less than 64. The result is broken into two 64-bit\r | |
287 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
288 | -------------------------------------------------------------------------------\r | |
289 | */\r | |
290 | INLINE void\r | |
291 | shortShift128Left(\r | |
292 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
293 | {\r | |
294 | \r | |
295 | *z1Ptr = a1<<count;\r | |
296 | *z0Ptr =\r | |
297 | ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );\r | |
298 | \r | |
299 | }\r | |
300 | \r | |
301 | /*\r | |
302 | -------------------------------------------------------------------------------\r | |
303 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left\r | |
304 | by the number of bits given in `count'. Any bits shifted off are lost.\r | |
305 | The value of `count' must be less than 64. The result is broken into three\r | |
306 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',\r | |
307 | `z1Ptr', and `z2Ptr'.\r | |
308 | -------------------------------------------------------------------------------\r | |
309 | */\r | |
310 | INLINE void\r | |
311 | shortShift192Left(\r | |
312 | bits64 a0,\r | |
313 | bits64 a1,\r | |
314 | bits64 a2,\r | |
315 | int16 count,\r | |
316 | bits64 *z0Ptr,\r | |
317 | bits64 *z1Ptr,\r | |
318 | bits64 *z2Ptr\r | |
319 | )\r | |
320 | {\r | |
321 | bits64 z0, z1, z2;\r | |
322 | int8 negCount;\r | |
323 | \r | |
324 | z2 = a2<<count;\r | |
325 | z1 = a1<<count;\r | |
326 | z0 = a0<<count;\r | |
327 | if ( 0 < count ) {\r | |
328 | negCount = ( ( - count ) & 63 );\r | |
329 | z1 |= a2>>negCount;\r | |
330 | z0 |= a1>>negCount;\r | |
331 | }\r | |
332 | *z2Ptr = z2;\r | |
333 | *z1Ptr = z1;\r | |
334 | *z0Ptr = z0;\r | |
335 | \r | |
336 | }\r | |
337 | \r | |
338 | /*\r | |
339 | -------------------------------------------------------------------------------\r | |
340 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit\r | |
341 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so\r | |
342 | any carry out is lost. The result is broken into two 64-bit pieces which\r | |
343 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.\r | |
344 | -------------------------------------------------------------------------------\r | |
345 | */\r | |
346 | INLINE void\r | |
347 | add128(\r | |
348 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
349 | {\r | |
350 | bits64 z1;\r | |
351 | \r | |
352 | z1 = a1 + b1;\r | |
353 | *z1Ptr = z1;\r | |
354 | *z0Ptr = a0 + b0 + ( z1 < a1 );\r | |
355 | \r | |
356 | }\r | |
357 | \r | |
358 | /*\r | |
359 | -------------------------------------------------------------------------------\r | |
360 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the\r | |
361 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is\r | |
362 | modulo 2^192, so any carry out is lost. The result is broken into three\r | |
363 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',\r | |
364 | `z1Ptr', and `z2Ptr'.\r | |
365 | -------------------------------------------------------------------------------\r | |
366 | */\r | |
367 | INLINE void\r | |
368 | add192(\r | |
369 | bits64 a0,\r | |
370 | bits64 a1,\r | |
371 | bits64 a2,\r | |
372 | bits64 b0,\r | |
373 | bits64 b1,\r | |
374 | bits64 b2,\r | |
375 | bits64 *z0Ptr,\r | |
376 | bits64 *z1Ptr,\r | |
377 | bits64 *z2Ptr\r | |
378 | )\r | |
379 | {\r | |
380 | bits64 z0, z1, z2;\r | |
381 | int8 carry0, carry1;\r | |
382 | \r | |
383 | z2 = a2 + b2;\r | |
384 | carry1 = ( z2 < a2 );\r | |
385 | z1 = a1 + b1;\r | |
386 | carry0 = ( z1 < a1 );\r | |
387 | z0 = a0 + b0;\r | |
388 | z1 += carry1;\r | |
389 | z0 += ( z1 < (bits64)carry1 );\r | |
390 | z0 += carry0;\r | |
391 | *z2Ptr = z2;\r | |
392 | *z1Ptr = z1;\r | |
393 | *z0Ptr = z0;\r | |
394 | \r | |
395 | }\r | |
396 | \r | |
397 | /*\r | |
398 | -------------------------------------------------------------------------------\r | |
399 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the\r | |
400 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo\r | |
401 | 2^128, so any borrow out (carry out) is lost. The result is broken into two\r | |
402 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and\r | |
403 | `z1Ptr'.\r | |
404 | -------------------------------------------------------------------------------\r | |
405 | */\r | |
406 | INLINE void\r | |
407 | sub128(\r | |
408 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
409 | {\r | |
410 | \r | |
411 | *z1Ptr = a1 - b1;\r | |
412 | *z0Ptr = a0 - b0 - ( a1 < b1 );\r | |
413 | \r | |
414 | }\r | |
415 | \r | |
416 | /*\r | |
417 | -------------------------------------------------------------------------------\r | |
418 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'\r | |
419 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.\r | |
420 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The\r | |
421 | result is broken into three 64-bit pieces which are stored at the locations\r | |
422 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.\r | |
423 | -------------------------------------------------------------------------------\r | |
424 | */\r | |
425 | INLINE void\r | |
426 | sub192(\r | |
427 | bits64 a0,\r | |
428 | bits64 a1,\r | |
429 | bits64 a2,\r | |
430 | bits64 b0,\r | |
431 | bits64 b1,\r | |
432 | bits64 b2,\r | |
433 | bits64 *z0Ptr,\r | |
434 | bits64 *z1Ptr,\r | |
435 | bits64 *z2Ptr\r | |
436 | )\r | |
437 | {\r | |
438 | bits64 z0, z1, z2;\r | |
439 | int8 borrow0, borrow1;\r | |
440 | \r | |
441 | z2 = a2 - b2;\r | |
442 | borrow1 = ( a2 < b2 );\r | |
443 | z1 = a1 - b1;\r | |
444 | borrow0 = ( a1 < b1 );\r | |
445 | z0 = a0 - b0;\r | |
446 | z0 -= ( z1 < (bits64)borrow1 );\r | |
447 | z1 -= borrow1;\r | |
448 | z0 -= borrow0;\r | |
449 | *z2Ptr = z2;\r | |
450 | *z1Ptr = z1;\r | |
451 | *z0Ptr = z0;\r | |
452 | \r | |
453 | }\r | |
454 | \r | |
455 | /*\r | |
456 | -------------------------------------------------------------------------------\r | |
457 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken\r | |
458 | into two 64-bit pieces which are stored at the locations pointed to by\r | |
459 | `z0Ptr' and `z1Ptr'.\r | |
460 | -------------------------------------------------------------------------------\r | |
461 | */\r | |
462 | INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )\r | |
463 | {\r | |
464 | bits32 aHigh, aLow, bHigh, bLow;\r | |
465 | bits64 z0, zMiddleA, zMiddleB, z1;\r | |
466 | \r | |
467 | aLow = (bits32)a;\r | |
468 | aHigh = (bits32)(a>>32);\r | |
469 | bLow = (bits32)b;\r | |
470 | bHigh = (bits32)(b>>32);\r | |
471 | z1 = ( (bits64) aLow ) * bLow;\r | |
472 | zMiddleA = ( (bits64) aLow ) * bHigh;\r | |
473 | zMiddleB = ( (bits64) aHigh ) * bLow;\r | |
474 | z0 = ( (bits64) aHigh ) * bHigh;\r | |
475 | zMiddleA += zMiddleB;\r | |
476 | z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );\r | |
477 | zMiddleA <<= 32;\r | |
478 | z1 += zMiddleA;\r | |
479 | z0 += ( z1 < zMiddleA );\r | |
480 | *z1Ptr = z1;\r | |
481 | *z0Ptr = z0;\r | |
482 | \r | |
483 | }\r | |
484 | \r | |
485 | /*\r | |
486 | -------------------------------------------------------------------------------\r | |
487 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by\r | |
488 | `b' to obtain a 192-bit product. The product is broken into three 64-bit\r | |
489 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and\r | |
490 | `z2Ptr'.\r | |
491 | -------------------------------------------------------------------------------\r | |
492 | */\r | |
493 | INLINE void\r | |
494 | mul128By64To192(\r | |
495 | bits64 a0,\r | |
496 | bits64 a1,\r | |
497 | bits64 b,\r | |
498 | bits64 *z0Ptr,\r | |
499 | bits64 *z1Ptr,\r | |
500 | bits64 *z2Ptr\r | |
501 | )\r | |
502 | {\r | |
503 | bits64 z0, z1, z2, more1;\r | |
504 | \r | |
505 | mul64To128( a1, b, &z1, &z2 );\r | |
506 | mul64To128( a0, b, &z0, &more1 );\r | |
507 | add128( z0, more1, 0, z1, &z0, &z1 );\r | |
508 | *z2Ptr = z2;\r | |
509 | *z1Ptr = z1;\r | |
510 | *z0Ptr = z0;\r | |
511 | \r | |
512 | }\r | |
513 | \r | |
514 | /*\r | |
515 | -------------------------------------------------------------------------------\r | |
516 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the\r | |
517 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit\r | |
518 | product. The product is broken into four 64-bit pieces which are stored at\r | |
519 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.\r | |
520 | -------------------------------------------------------------------------------\r | |
521 | */\r | |
522 | INLINE void\r | |
523 | mul128To256(\r | |
524 | bits64 a0,\r | |
525 | bits64 a1,\r | |
526 | bits64 b0,\r | |
527 | bits64 b1,\r | |
528 | bits64 *z0Ptr,\r | |
529 | bits64 *z1Ptr,\r | |
530 | bits64 *z2Ptr,\r | |
531 | bits64 *z3Ptr\r | |
532 | )\r | |
533 | {\r | |
534 | bits64 z0, z1, z2, z3;\r | |
535 | bits64 more1, more2;\r | |
536 | \r | |
537 | mul64To128( a1, b1, &z2, &z3 );\r | |
538 | mul64To128( a1, b0, &z1, &more2 );\r | |
539 | add128( z1, more2, 0, z2, &z1, &z2 );\r | |
540 | mul64To128( a0, b0, &z0, &more1 );\r | |
541 | add128( z0, more1, 0, z1, &z0, &z1 );\r | |
542 | mul64To128( a0, b1, &more1, &more2 );\r | |
543 | add128( more1, more2, 0, z2, &more1, &z2 );\r | |
544 | add128( z0, z1, 0, more1, &z0, &z1 );\r | |
545 | *z3Ptr = z3;\r | |
546 | *z2Ptr = z2;\r | |
547 | *z1Ptr = z1;\r | |
548 | *z0Ptr = z0;\r | |
549 | \r | |
550 | }\r | |
551 | \r | |
552 | /*\r | |
553 | -------------------------------------------------------------------------------\r | |
554 | Returns an approximation to the 64-bit integer quotient obtained by dividing\r | |
555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The\r | |
556 | divisor `b' must be at least 2^63. If q is the exact quotient truncated\r | |
557 | toward zero, the approximation returned lies between q and q + 2 inclusive.\r | |
558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit\r | |
559 | unsigned integer is returned.\r | |
560 | -------------------------------------------------------------------------------\r | |
561 | */\r | |
562 | static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )\r | |
563 | {\r | |
564 | bits64 b0, b1;\r | |
565 | bits64 rem0, rem1, term0, term1;\r | |
566 | bits64 z;\r | |
567 | \r | |
568 | if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );\r | |
569 | b0 = b>>32;\r | |
570 | z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;\r | |
571 | mul64To128( b, z, &term0, &term1 );\r | |
572 | sub128( a0, a1, term0, term1, &rem0, &rem1 );\r | |
573 | while ( ( (sbits64) rem0 ) < 0 ) {\r | |
574 | z -= LIT64( 0x100000000 );\r | |
575 | b1 = b<<32;\r | |
576 | add128( rem0, rem1, b0, b1, &rem0, &rem1 );\r | |
577 | }\r | |
578 | rem0 = ( rem0<<32 ) | ( rem1>>32 );\r | |
579 | z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;\r | |
580 | return z;\r | |
581 | \r | |
582 | }\r | |
583 | \r | |
584 | #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)\r | |
585 | /*\r | |
586 | -------------------------------------------------------------------------------\r | |
587 | Returns an approximation to the square root of the 32-bit significand given\r | |
588 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of\r | |
589 | `aExp' (the least significant bit) is 1, the integer returned approximates\r | |
590 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'\r | |
591 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either\r | |
592 | case, the approximation returned lies strictly within +/-2 of the exact\r | |
593 | value.\r | |
594 | -------------------------------------------------------------------------------\r | |
595 | */\r | |
596 | static bits32 estimateSqrt32( int16 aExp, bits32 a )\r | |
597 | {\r | |
598 | static const bits16 sqrtOddAdjustments[] = {\r | |
599 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,\r | |
600 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67\r | |
601 | };\r | |
602 | static const bits16 sqrtEvenAdjustments[] = {\r | |
603 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,\r | |
604 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002\r | |
605 | };\r | |
606 | int8 idx;\r | |
607 | bits32 z;\r | |
608 | \r | |
609 | idx = ( a>>27 ) & 15;\r | |
610 | if ( aExp & 1 ) {\r | |
611 | z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];\r | |
612 | z = ( ( a / z )<<14 ) + ( z<<15 );\r | |
613 | a >>= 1;\r | |
614 | }\r | |
615 | else {\r | |
616 | z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];\r | |
617 | z = a / z + z;\r | |
618 | z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );\r | |
619 | if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 );\r | |
620 | }\r | |
621 | return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );\r | |
622 | \r | |
623 | }\r | |
624 | #endif\r | |
625 | \r | |
626 | /*\r | |
627 | -------------------------------------------------------------------------------\r | |
628 | Returns the number of leading 0 bits before the most-significant 1 bit of\r | |
629 | `a'. If `a' is zero, 32 is returned.\r | |
630 | -------------------------------------------------------------------------------\r | |
631 | */\r | |
632 | static int8 countLeadingZeros32( bits32 a )\r | |
633 | {\r | |
634 | static const int8 countLeadingZerosHigh[] = {\r | |
635 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,\r | |
636 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\r | |
637 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
638 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\r | |
639 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
640 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
641 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
642 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r | |
643 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
644 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
645 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
646 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
647 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
648 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
649 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r | |
650 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r | |
651 | };\r | |
652 | int8 shiftCount;\r | |
653 | \r | |
654 | shiftCount = 0;\r | |
655 | if ( a < 0x10000 ) {\r | |
656 | shiftCount += 16;\r | |
657 | a <<= 16;\r | |
658 | }\r | |
659 | if ( a < 0x1000000 ) {\r | |
660 | shiftCount += 8;\r | |
661 | a <<= 8;\r | |
662 | }\r | |
663 | shiftCount += countLeadingZerosHigh[ a>>24 ];\r | |
664 | return shiftCount;\r | |
665 | \r | |
666 | }\r | |
667 | \r | |
668 | /*\r | |
669 | -------------------------------------------------------------------------------\r | |
670 | Returns the number of leading 0 bits before the most-significant 1 bit of\r | |
671 | `a'. If `a' is zero, 64 is returned.\r | |
672 | -------------------------------------------------------------------------------\r | |
673 | */\r | |
674 | static int8 countLeadingZeros64( bits64 a )\r | |
675 | {\r | |
676 | int8 shiftCount;\r | |
677 | \r | |
678 | shiftCount = 0;\r | |
679 | if ( a < ( (bits64) 1 )<<32 ) {\r | |
680 | shiftCount += 32;\r | |
681 | }\r | |
682 | else {\r | |
683 | a >>= 32;\r | |
684 | }\r | |
685 | shiftCount += (int8)countLeadingZeros32( (bits32)a );\r | |
686 | return shiftCount;\r | |
687 | \r | |
688 | }\r | |
689 | \r | |
690 | /*\r | |
691 | -------------------------------------------------------------------------------\r | |
692 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'\r | |
693 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.\r | |
694 | Otherwise, returns 0.\r | |
695 | -------------------------------------------------------------------------------\r | |
696 | */\r | |
697 | INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )\r | |
698 | {\r | |
699 | \r | |
700 | return ( a0 == b0 ) && ( a1 == b1 );\r | |
701 | \r | |
702 | }\r | |
703 | \r | |
704 | /*\r | |
705 | -------------------------------------------------------------------------------\r | |
706 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less\r | |
707 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.\r | |
708 | Otherwise, returns 0.\r | |
709 | -------------------------------------------------------------------------------\r | |
710 | */\r | |
711 | INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )\r | |
712 | {\r | |
713 | \r | |
714 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );\r | |
715 | \r | |
716 | }\r | |
717 | \r | |
718 | /*\r | |
719 | -------------------------------------------------------------------------------\r | |
720 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less\r | |
721 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,\r | |
722 | returns 0.\r | |
723 | -------------------------------------------------------------------------------\r | |
724 | */\r | |
725 | INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )\r | |
726 | {\r | |
727 | \r | |
728 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );\r | |
729 | \r | |
730 | }\r | |
731 | \r | |
732 | /*\r | |
733 | -------------------------------------------------------------------------------\r | |
734 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is\r | |
735 | not equal to the 128-bit value formed by concatenating `b0' and `b1'.\r | |
736 | Otherwise, returns 0.\r | |
737 | -------------------------------------------------------------------------------\r | |
738 | */\r | |
739 | INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )\r | |
740 | {\r | |
741 | \r | |
742 | return ( a0 != b0 ) || ( a1 != b1 );\r | |
743 | \r | |
744 | }\r | |
745 | \r |