]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * fp_util.S | |
3 | * | |
4 | * Copyright Roman Zippel, 1997. All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, and the entire permission notice in its entirety, | |
11 | * including the disclaimer of warranties. | |
12 | * 2. Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in the | |
14 | * documentation and/or other materials provided with the distribution. | |
15 | * 3. The name of the author may not be used to endorse or promote | |
16 | * products derived from this software without specific prior | |
17 | * written permission. | |
18 | * | |
19 | * ALTERNATIVELY, this product may be distributed under the terms of | |
20 | * the GNU General Public License, in which case the provisions of the GPL are | |
21 | * required INSTEAD OF the above restrictions. (This clause is | |
22 | * necessary due to a potential bad interaction between the GPL and | |
23 | * the restrictions contained in a BSD-style copyright.) | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | |
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | |
36 | */ | |
37 | ||
1da177e4 LT |
38 | #include "fp_emu.h" |
39 | ||
40 | /* | |
41 | * Here are lots of conversion and normalization functions mainly | |
42 | * used by fp_scan.S | |
43 | * Note that these functions are optimized for "normal" numbers, | |
44 | * these are handled first and exit as fast as possible, this is | |
45 | * especially important for fp_normalize_ext/fp_conv_ext2ext, as | |
46 | * it's called very often. | |
47 | * The register usage is optimized for fp_scan.S and which register | |
48 | * is currently at that time unused, be careful if you want change | |
49 | * something here. %d0 and %d1 is always usable, sometimes %d2 (or | |
50 | * only the lower half) most function have to return the %a0 | |
51 | * unmodified, so that the caller can immediately reuse it. | |
52 | */ | |
53 | ||
54 | .globl fp_ill, fp_end | |
55 | ||
56 | | exits from fp_scan: | |
57 | | illegal instruction | |
58 | fp_ill: | |
59 | printf ,"fp_illegal\n" | |
60 | rts | |
61 | | completed instruction | |
62 | fp_end: | |
63 | tst.l (TASK_MM-8,%a2) | |
64 | jmi 1f | |
65 | tst.l (TASK_MM-4,%a2) | |
66 | jmi 1f | |
67 | tst.l (TASK_MM,%a2) | |
68 | jpl 2f | |
69 | 1: printf ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM) | |
70 | 2: clr.l %d0 | |
71 | rts | |
72 | ||
73 | .globl fp_conv_long2ext, fp_conv_single2ext | |
74 | .globl fp_conv_double2ext, fp_conv_ext2ext | |
75 | .globl fp_normalize_ext, fp_normalize_double | |
76 | .globl fp_normalize_single, fp_normalize_single_fast | |
77 | .globl fp_conv_ext2double, fp_conv_ext2single | |
78 | .globl fp_conv_ext2long, fp_conv_ext2short | |
79 | .globl fp_conv_ext2byte | |
80 | .globl fp_finalrounding_single, fp_finalrounding_single_fast | |
81 | .globl fp_finalrounding_double | |
82 | .globl fp_finalrounding, fp_finaltest, fp_final | |
83 | ||
84 | /* | |
85 | * First several conversion functions from a source operand | |
86 | * into the extended format. Note, that only fp_conv_ext2ext | |
87 | * normalizes the number and is always called after the other | |
88 | * conversion functions, which only move the information into | |
89 | * fp_ext structure. | |
90 | */ | |
91 | ||
92 | | fp_conv_long2ext: | |
93 | | | |
94 | | args: %d0 = source (32-bit long) | |
95 | | %a0 = destination (ptr to struct fp_ext) | |
96 | ||
97 | fp_conv_long2ext: | |
98 | printf PCONV,"l2e: %p -> %p(",2,%d0,%a0 | |
99 | clr.l %d1 | sign defaults to zero | |
100 | tst.l %d0 | |
101 | jeq fp_l2e_zero | is source zero? | |
102 | jpl 1f | positive? | |
103 | moveq #1,%d1 | |
104 | neg.l %d0 | |
105 | 1: swap %d1 | |
106 | move.w #0x3fff+31,%d1 | |
107 | move.l %d1,(%a0)+ | set sign / exp | |
108 | move.l %d0,(%a0)+ | set mantissa | |
109 | clr.l (%a0) | |
110 | subq.l #8,%a0 | restore %a0 | |
111 | printx PCONV,%a0@ | |
112 | printf PCONV,")\n" | |
113 | rts | |
114 | | source is zero | |
115 | fp_l2e_zero: | |
116 | clr.l (%a0)+ | |
117 | clr.l (%a0)+ | |
118 | clr.l (%a0) | |
119 | subq.l #8,%a0 | |
120 | printx PCONV,%a0@ | |
121 | printf PCONV,")\n" | |
122 | rts | |
123 | ||
124 | | fp_conv_single2ext | |
125 | | args: %d0 = source (single-precision fp value) | |
126 | | %a0 = dest (struct fp_ext *) | |
127 | ||
128 | fp_conv_single2ext: | |
129 | printf PCONV,"s2e: %p -> %p(",2,%d0,%a0 | |
130 | move.l %d0,%d1 | |
131 | lsl.l #8,%d0 | shift mantissa | |
132 | lsr.l #8,%d1 | exponent / sign | |
133 | lsr.l #7,%d1 | |
134 | lsr.w #8,%d1 | |
135 | jeq fp_s2e_small | zero / denormal? | |
136 | cmp.w #0xff,%d1 | NaN / Inf? | |
137 | jeq fp_s2e_large | |
138 | bset #31,%d0 | set explizit bit | |
139 | add.w #0x3fff-0x7f,%d1 | re-bias the exponent. | |
140 | 9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp | |
141 | move.l %d0,(%a0)+ | high lword of fp_ext.mant | |
142 | clr.l (%a0) | low lword = 0 | |
143 | subq.l #8,%a0 | |
144 | printx PCONV,%a0@ | |
145 | printf PCONV,")\n" | |
146 | rts | |
147 | | zeros and denormalized | |
148 | fp_s2e_small: | |
149 | | exponent is zero, so explizit bit is already zero too | |
150 | tst.l %d0 | |
151 | jeq 9b | |
152 | move.w #0x4000-0x7f,%d1 | |
153 | jra 9b | |
154 | | infinities and NAN | |
155 | fp_s2e_large: | |
156 | bclr #31,%d0 | clear explizit bit | |
157 | move.w #0x7fff,%d1 | |
158 | jra 9b | |
159 | ||
160 | fp_conv_double2ext: | |
161 | #ifdef FPU_EMU_DEBUG | |
162 | getuser.l %a1@(0),%d0,fp_err_ua2,%a1 | |
163 | getuser.l %a1@(4),%d1,fp_err_ua2,%a1 | |
164 | printf PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0 | |
165 | #endif | |
166 | getuser.l (%a1)+,%d0,fp_err_ua2,%a1 | |
167 | move.l %d0,%d1 | |
168 | lsl.l #8,%d0 | shift high mantissa | |
169 | lsl.l #3,%d0 | |
170 | lsr.l #8,%d1 | exponent / sign | |
171 | lsr.l #7,%d1 | |
172 | lsr.w #5,%d1 | |
173 | jeq fp_d2e_small | zero / denormal? | |
174 | cmp.w #0x7ff,%d1 | NaN / Inf? | |
175 | jeq fp_d2e_large | |
176 | bset #31,%d0 | set explizit bit | |
177 | add.w #0x3fff-0x3ff,%d1 | re-bias the exponent. | |
178 | 9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp | |
179 | move.l %d0,(%a0)+ | |
180 | getuser.l (%a1)+,%d0,fp_err_ua2,%a1 | |
181 | move.l %d0,%d1 | |
182 | lsl.l #8,%d0 | |
183 | lsl.l #3,%d0 | |
184 | move.l %d0,(%a0) | |
185 | moveq #21,%d0 | |
186 | lsr.l %d0,%d1 | |
187 | or.l %d1,-(%a0) | |
188 | subq.l #4,%a0 | |
189 | printx PCONV,%a0@ | |
190 | printf PCONV,")\n" | |
191 | rts | |
192 | | zeros and denormalized | |
193 | fp_d2e_small: | |
194 | | exponent is zero, so explizit bit is already zero too | |
195 | tst.l %d0 | |
196 | jeq 9b | |
197 | move.w #0x4000-0x3ff,%d1 | |
198 | jra 9b | |
199 | | infinities and NAN | |
200 | fp_d2e_large: | |
201 | bclr #31,%d0 | clear explizit bit | |
202 | move.w #0x7fff,%d1 | |
203 | jra 9b | |
204 | ||
205 | | fp_conv_ext2ext: | |
206 | | originally used to get longdouble from userspace, now it's | |
207 | | called before arithmetic operations to make sure the number | |
208 | | is normalized [maybe rename it?]. | |
209 | | args: %a0 = dest (struct fp_ext *) | |
210 | | returns 0 in %d0 for a NaN, otherwise 1 | |
211 | ||
212 | fp_conv_ext2ext: | |
213 | printf PCONV,"e2e: %p(",1,%a0 | |
214 | printx PCONV,%a0@ | |
215 | printf PCONV,"), " | |
216 | move.l (%a0)+,%d0 | |
217 | cmp.w #0x7fff,%d0 | Inf / NaN? | |
218 | jeq fp_e2e_large | |
219 | move.l (%a0),%d0 | |
220 | jpl fp_e2e_small | zero / denorm? | |
221 | | The high bit is set, so normalization is irrelevant. | |
222 | fp_e2e_checkround: | |
223 | subq.l #4,%a0 | |
224 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
225 | move.b (%a0),%d0 | |
226 | jne fp_e2e_round | |
227 | #endif | |
228 | printf PCONV,"%p(",1,%a0 | |
229 | printx PCONV,%a0@ | |
230 | printf PCONV,")\n" | |
231 | moveq #1,%d0 | |
232 | rts | |
233 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
234 | fp_e2e_round: | |
235 | fp_set_sr FPSR_EXC_INEX2 | |
236 | clr.b (%a0) | |
237 | move.w (FPD_RND,FPDATA),%d2 | |
238 | jne fp_e2e_roundother | %d2 == 0, round to nearest | |
239 | tst.b %d0 | test guard bit | |
240 | jpl 9f | zero is closer | |
241 | btst #0,(11,%a0) | test lsb bit | |
242 | jne fp_e2e_doroundup | round to infinity | |
243 | lsl.b #1,%d0 | check low bits | |
244 | jeq 9f | round to zero | |
245 | fp_e2e_doroundup: | |
246 | addq.l #1,(8,%a0) | |
247 | jcc 9f | |
248 | addq.l #1,(4,%a0) | |
249 | jcc 9f | |
250 | move.w #0x8000,(4,%a0) | |
251 | addq.w #1,(2,%a0) | |
252 | 9: printf PNORM,"%p(",1,%a0 | |
253 | printx PNORM,%a0@ | |
254 | printf PNORM,")\n" | |
255 | rts | |
256 | fp_e2e_roundother: | |
257 | subq.w #2,%d2 | |
258 | jcs 9b | %d2 < 2, round to zero | |
259 | jhi 1f | %d2 > 2, round to +infinity | |
260 | tst.b (1,%a0) | to -inf | |
261 | jne fp_e2e_doroundup | negative, round to infinity | |
262 | jra 9b | positive, round to zero | |
263 | 1: tst.b (1,%a0) | to +inf | |
264 | jeq fp_e2e_doroundup | positive, round to infinity | |
265 | jra 9b | negative, round to zero | |
266 | #endif | |
267 | | zeros and subnormals: | |
268 | | try to normalize these anyway. | |
269 | fp_e2e_small: | |
270 | jne fp_e2e_small1 | high lword zero? | |
271 | move.l (4,%a0),%d0 | |
272 | jne fp_e2e_small2 | |
273 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
274 | clr.l %d0 | |
275 | move.b (-4,%a0),%d0 | |
276 | jne fp_e2e_small3 | |
277 | #endif | |
278 | | Genuine zero. | |
279 | clr.w -(%a0) | |
280 | subq.l #2,%a0 | |
281 | printf PNORM,"%p(",1,%a0 | |
282 | printx PNORM,%a0@ | |
283 | printf PNORM,")\n" | |
284 | moveq #1,%d0 | |
285 | rts | |
286 | | definitely subnormal, need to shift all 64 bits | |
287 | fp_e2e_small1: | |
288 | bfffo %d0{#0,#32},%d1 | |
289 | move.w -(%a0),%d2 | |
290 | sub.w %d1,%d2 | |
291 | jcc 1f | |
292 | | Pathologically small, denormalize. | |
293 | add.w %d2,%d1 | |
294 | clr.w %d2 | |
295 | 1: move.w %d2,(%a0)+ | |
296 | move.w %d1,%d2 | |
297 | jeq fp_e2e_checkround | |
298 | | fancy 64-bit double-shift begins here | |
299 | lsl.l %d2,%d0 | |
300 | move.l %d0,(%a0)+ | |
301 | move.l (%a0),%d0 | |
302 | move.l %d0,%d1 | |
303 | lsl.l %d2,%d0 | |
304 | move.l %d0,(%a0) | |
305 | neg.w %d2 | |
306 | and.w #0x1f,%d2 | |
307 | lsr.l %d2,%d1 | |
308 | or.l %d1,-(%a0) | |
309 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
310 | fp_e2e_extra1: | |
311 | clr.l %d0 | |
312 | move.b (-4,%a0),%d0 | |
313 | neg.w %d2 | |
314 | add.w #24,%d2 | |
315 | jcc 1f | |
316 | clr.b (-4,%a0) | |
317 | lsl.l %d2,%d0 | |
318 | or.l %d0,(4,%a0) | |
319 | jra fp_e2e_checkround | |
320 | 1: addq.w #8,%d2 | |
321 | lsl.l %d2,%d0 | |
322 | move.b %d0,(-4,%a0) | |
323 | lsr.l #8,%d0 | |
324 | or.l %d0,(4,%a0) | |
325 | #endif | |
326 | jra fp_e2e_checkround | |
327 | | pathologically small subnormal | |
328 | fp_e2e_small2: | |
329 | bfffo %d0{#0,#32},%d1 | |
330 | add.w #32,%d1 | |
331 | move.w -(%a0),%d2 | |
332 | sub.w %d1,%d2 | |
333 | jcc 1f | |
334 | | Beyond pathologically small, denormalize. | |
335 | add.w %d2,%d1 | |
336 | clr.w %d2 | |
337 | 1: move.w %d2,(%a0)+ | |
338 | ext.l %d1 | |
339 | jeq fp_e2e_checkround | |
340 | clr.l (4,%a0) | |
341 | sub.w #32,%d2 | |
342 | jcs 1f | |
343 | lsl.l %d1,%d0 | lower lword needs only to be shifted | |
344 | move.l %d0,(%a0) | into the higher lword | |
345 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
346 | clr.l %d0 | |
347 | move.b (-4,%a0),%d0 | |
348 | clr.b (-4,%a0) | |
349 | neg.w %d1 | |
350 | add.w #32,%d1 | |
351 | bfins %d0,(%a0){%d1,#8} | |
352 | #endif | |
353 | jra fp_e2e_checkround | |
354 | 1: neg.w %d1 | lower lword is splitted between | |
355 | bfins %d0,(%a0){%d1,#32} | higher and lower lword | |
356 | #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC | |
357 | jra fp_e2e_checkround | |
358 | #else | |
359 | move.w %d1,%d2 | |
360 | jra fp_e2e_extra1 | |
361 | | These are extremely small numbers, that will mostly end up as zero | |
362 | | anyway, so this is only important for correct rounding. | |
363 | fp_e2e_small3: | |
364 | bfffo %d0{#24,#8},%d1 | |
365 | add.w #40,%d1 | |
366 | move.w -(%a0),%d2 | |
367 | sub.w %d1,%d2 | |
368 | jcc 1f | |
369 | | Pathologically small, denormalize. | |
370 | add.w %d2,%d1 | |
371 | clr.w %d2 | |
372 | 1: move.w %d2,(%a0)+ | |
373 | ext.l %d1 | |
374 | jeq fp_e2e_checkround | |
375 | cmp.w #8,%d1 | |
376 | jcs 2f | |
377 | 1: clr.b (-4,%a0) | |
378 | sub.w #64,%d1 | |
379 | jcs 1f | |
380 | add.w #24,%d1 | |
381 | lsl.l %d1,%d0 | |
382 | move.l %d0,(%a0) | |
383 | jra fp_e2e_checkround | |
384 | 1: neg.w %d1 | |
385 | bfins %d0,(%a0){%d1,#8} | |
386 | jra fp_e2e_checkround | |
387 | 2: lsl.l %d1,%d0 | |
388 | move.b %d0,(-4,%a0) | |
389 | lsr.l #8,%d0 | |
390 | move.b %d0,(7,%a0) | |
391 | jra fp_e2e_checkround | |
392 | #endif | |
393 | 1: move.l %d0,%d1 | lower lword is splitted between | |
394 | lsl.l %d2,%d0 | higher and lower lword | |
395 | move.l %d0,(%a0) | |
396 | move.l %d1,%d0 | |
397 | neg.w %d2 | |
398 | add.w #32,%d2 | |
399 | lsr.l %d2,%d0 | |
400 | move.l %d0,-(%a0) | |
401 | jra fp_e2e_checkround | |
402 | | Infinities and NaNs | |
403 | fp_e2e_large: | |
404 | move.l (%a0)+,%d0 | |
405 | jne 3f | |
406 | 1: tst.l (%a0) | |
407 | jne 4f | |
408 | moveq #1,%d0 | |
409 | 2: subq.l #8,%a0 | |
410 | printf PCONV,"%p(",1,%a0 | |
411 | printx PCONV,%a0@ | |
412 | printf PCONV,")\n" | |
413 | rts | |
414 | | we have maybe a NaN, shift off the highest bit | |
415 | 3: lsl.l #1,%d0 | |
416 | jeq 1b | |
417 | | we have a NaN, clear the return value | |
418 | 4: clrl %d0 | |
419 | jra 2b | |
420 | ||
421 | ||
422 | /* | |
423 | * Normalization functions. Call these on the output of general | |
424 | * FP operators, and before any conversion into the destination | |
425 | * formats. fp_normalize_ext has always to be called first, the | |
426 | * following conversion functions expect an already normalized | |
427 | * number. | |
428 | */ | |
429 | ||
430 | | fp_normalize_ext: | |
431 | | normalize an extended in extended (unpacked) format, basically | |
432 | | it does the same as fp_conv_ext2ext, additionally it also does | |
433 | | the necessary postprocessing checks. | |
434 | | args: %a0 (struct fp_ext *) | |
435 | | NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2 | |
436 | ||
437 | fp_normalize_ext: | |
438 | printf PNORM,"ne: %p(",1,%a0 | |
439 | printx PNORM,%a0@ | |
440 | printf PNORM,"), " | |
441 | move.l (%a0)+,%d0 | |
442 | cmp.w #0x7fff,%d0 | Inf / NaN? | |
443 | jeq fp_ne_large | |
444 | move.l (%a0),%d0 | |
445 | jpl fp_ne_small | zero / denorm? | |
446 | | The high bit is set, so normalization is irrelevant. | |
447 | fp_ne_checkround: | |
448 | subq.l #4,%a0 | |
449 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
450 | move.b (%a0),%d0 | |
451 | jne fp_ne_round | |
452 | #endif | |
453 | printf PNORM,"%p(",1,%a0 | |
454 | printx PNORM,%a0@ | |
455 | printf PNORM,")\n" | |
456 | rts | |
457 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
458 | fp_ne_round: | |
459 | fp_set_sr FPSR_EXC_INEX2 | |
460 | clr.b (%a0) | |
461 | move.w (FPD_RND,FPDATA),%d2 | |
462 | jne fp_ne_roundother | %d2 == 0, round to nearest | |
463 | tst.b %d0 | test guard bit | |
464 | jpl 9f | zero is closer | |
465 | btst #0,(11,%a0) | test lsb bit | |
466 | jne fp_ne_doroundup | round to infinity | |
467 | lsl.b #1,%d0 | check low bits | |
468 | jeq 9f | round to zero | |
469 | fp_ne_doroundup: | |
470 | addq.l #1,(8,%a0) | |
471 | jcc 9f | |
472 | addq.l #1,(4,%a0) | |
473 | jcc 9f | |
474 | addq.w #1,(2,%a0) | |
475 | move.w #0x8000,(4,%a0) | |
476 | 9: printf PNORM,"%p(",1,%a0 | |
477 | printx PNORM,%a0@ | |
478 | printf PNORM,")\n" | |
479 | rts | |
480 | fp_ne_roundother: | |
481 | subq.w #2,%d2 | |
482 | jcs 9b | %d2 < 2, round to zero | |
483 | jhi 1f | %d2 > 2, round to +infinity | |
484 | tst.b (1,%a0) | to -inf | |
485 | jne fp_ne_doroundup | negative, round to infinity | |
486 | jra 9b | positive, round to zero | |
487 | 1: tst.b (1,%a0) | to +inf | |
488 | jeq fp_ne_doroundup | positive, round to infinity | |
489 | jra 9b | negative, round to zero | |
490 | #endif | |
491 | | Zeros and subnormal numbers | |
492 | | These are probably merely subnormal, rather than "denormalized" | |
493 | | numbers, so we will try to make them normal again. | |
494 | fp_ne_small: | |
495 | jne fp_ne_small1 | high lword zero? | |
496 | move.l (4,%a0),%d0 | |
497 | jne fp_ne_small2 | |
498 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
499 | clr.l %d0 | |
500 | move.b (-4,%a0),%d0 | |
501 | jne fp_ne_small3 | |
502 | #endif | |
503 | | Genuine zero. | |
504 | clr.w -(%a0) | |
505 | subq.l #2,%a0 | |
506 | printf PNORM,"%p(",1,%a0 | |
507 | printx PNORM,%a0@ | |
508 | printf PNORM,")\n" | |
509 | rts | |
510 | | Subnormal. | |
511 | fp_ne_small1: | |
512 | bfffo %d0{#0,#32},%d1 | |
513 | move.w -(%a0),%d2 | |
514 | sub.w %d1,%d2 | |
515 | jcc 1f | |
516 | | Pathologically small, denormalize. | |
517 | add.w %d2,%d1 | |
518 | clr.w %d2 | |
519 | fp_set_sr FPSR_EXC_UNFL | |
520 | 1: move.w %d2,(%a0)+ | |
521 | move.w %d1,%d2 | |
522 | jeq fp_ne_checkround | |
523 | | This is exactly the same 64-bit double shift as seen above. | |
524 | lsl.l %d2,%d0 | |
525 | move.l %d0,(%a0)+ | |
526 | move.l (%a0),%d0 | |
527 | move.l %d0,%d1 | |
528 | lsl.l %d2,%d0 | |
529 | move.l %d0,(%a0) | |
530 | neg.w %d2 | |
531 | and.w #0x1f,%d2 | |
532 | lsr.l %d2,%d1 | |
533 | or.l %d1,-(%a0) | |
534 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
535 | fp_ne_extra1: | |
536 | clr.l %d0 | |
537 | move.b (-4,%a0),%d0 | |
538 | neg.w %d2 | |
539 | add.w #24,%d2 | |
540 | jcc 1f | |
541 | clr.b (-4,%a0) | |
542 | lsl.l %d2,%d0 | |
543 | or.l %d0,(4,%a0) | |
544 | jra fp_ne_checkround | |
545 | 1: addq.w #8,%d2 | |
546 | lsl.l %d2,%d0 | |
547 | move.b %d0,(-4,%a0) | |
548 | lsr.l #8,%d0 | |
549 | or.l %d0,(4,%a0) | |
550 | #endif | |
551 | jra fp_ne_checkround | |
552 | | May or may not be subnormal, if so, only 32 bits to shift. | |
553 | fp_ne_small2: | |
554 | bfffo %d0{#0,#32},%d1 | |
555 | add.w #32,%d1 | |
556 | move.w -(%a0),%d2 | |
557 | sub.w %d1,%d2 | |
558 | jcc 1f | |
559 | | Beyond pathologically small, denormalize. | |
560 | add.w %d2,%d1 | |
561 | clr.w %d2 | |
562 | fp_set_sr FPSR_EXC_UNFL | |
563 | 1: move.w %d2,(%a0)+ | |
564 | ext.l %d1 | |
565 | jeq fp_ne_checkround | |
566 | clr.l (4,%a0) | |
567 | sub.w #32,%d1 | |
568 | jcs 1f | |
569 | lsl.l %d1,%d0 | lower lword needs only to be shifted | |
570 | move.l %d0,(%a0) | into the higher lword | |
571 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | |
572 | clr.l %d0 | |
573 | move.b (-4,%a0),%d0 | |
574 | clr.b (-4,%a0) | |
575 | neg.w %d1 | |
576 | add.w #32,%d1 | |
577 | bfins %d0,(%a0){%d1,#8} | |
578 | #endif | |
579 | jra fp_ne_checkround | |
580 | 1: neg.w %d1 | lower lword is splitted between | |
581 | bfins %d0,(%a0){%d1,#32} | higher and lower lword | |
582 | #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC | |
583 | jra fp_ne_checkround | |
584 | #else | |
585 | move.w %d1,%d2 | |
586 | jra fp_ne_extra1 | |
587 | | These are extremely small numbers, that will mostly end up as zero | |
588 | | anyway, so this is only important for correct rounding. | |
589 | fp_ne_small3: | |
590 | bfffo %d0{#24,#8},%d1 | |
591 | add.w #40,%d1 | |
592 | move.w -(%a0),%d2 | |
593 | sub.w %d1,%d2 | |
594 | jcc 1f | |
595 | | Pathologically small, denormalize. | |
596 | add.w %d2,%d1 | |
597 | clr.w %d2 | |
598 | 1: move.w %d2,(%a0)+ | |
599 | ext.l %d1 | |
600 | jeq fp_ne_checkround | |
601 | cmp.w #8,%d1 | |
602 | jcs 2f | |
603 | 1: clr.b (-4,%a0) | |
604 | sub.w #64,%d1 | |
605 | jcs 1f | |
606 | add.w #24,%d1 | |
607 | lsl.l %d1,%d0 | |
608 | move.l %d0,(%a0) | |
609 | jra fp_ne_checkround | |
610 | 1: neg.w %d1 | |
611 | bfins %d0,(%a0){%d1,#8} | |
612 | jra fp_ne_checkround | |
613 | 2: lsl.l %d1,%d0 | |
614 | move.b %d0,(-4,%a0) | |
615 | lsr.l #8,%d0 | |
616 | move.b %d0,(7,%a0) | |
617 | jra fp_ne_checkround | |
618 | #endif | |
619 | | Infinities and NaNs, again, same as above. | |
620 | fp_ne_large: | |
621 | move.l (%a0)+,%d0 | |
622 | jne 3f | |
623 | 1: tst.l (%a0) | |
624 | jne 4f | |
625 | 2: subq.l #8,%a0 | |
626 | printf PNORM,"%p(",1,%a0 | |
627 | printx PNORM,%a0@ | |
628 | printf PNORM,")\n" | |
629 | rts | |
630 | | we have maybe a NaN, shift off the highest bit | |
631 | 3: move.l %d0,%d1 | |
632 | lsl.l #1,%d1 | |
633 | jne 4f | |
634 | clr.l (-4,%a0) | |
635 | jra 1b | |
636 | | we have a NaN, test if it is signaling | |
637 | 4: bset #30,%d0 | |
638 | jne 2b | |
639 | fp_set_sr FPSR_EXC_SNAN | |
640 | move.l %d0,(-4,%a0) | |
641 | jra 2b | |
642 | ||
643 | | these next two do rounding as per the IEEE standard. | |
644 | | values for the rounding modes appear to be: | |
645 | | 0: Round to nearest | |
646 | | 1: Round to zero | |
647 | | 2: Round to -Infinity | |
648 | | 3: Round to +Infinity | |
649 | | both functions expect that fp_normalize was already | |
650 | | called (and extended argument is already normalized | |
651 | | as far as possible), these are used if there is different | |
652 | | rounding precision is selected and before converting | |
653 | | into single/double | |
654 | ||
655 | | fp_normalize_double: | |
656 | | normalize an extended with double (52-bit) precision | |
657 | | args: %a0 (struct fp_ext *) | |
658 | ||
659 | fp_normalize_double: | |
660 | printf PNORM,"nd: %p(",1,%a0 | |
661 | printx PNORM,%a0@ | |
662 | printf PNORM,"), " | |
663 | move.l (%a0)+,%d2 | |
664 | tst.w %d2 | |
665 | jeq fp_nd_zero | zero / denormalized | |
666 | cmp.w #0x7fff,%d2 | |
667 | jeq fp_nd_huge | NaN / infinitive. | |
668 | sub.w #0x4000-0x3ff,%d2 | will the exponent fit? | |
669 | jcs fp_nd_small | too small. | |
670 | cmp.w #0x7fe,%d2 | |
671 | jcc fp_nd_large | too big. | |
672 | addq.l #4,%a0 | |
673 | move.l (%a0),%d0 | low lword of mantissa | |
674 | | now, round off the low 11 bits. | |
675 | fp_nd_round: | |
676 | moveq #21,%d1 | |
677 | lsl.l %d1,%d0 | keep 11 low bits. | |
678 | jne fp_nd_checkround | Are they non-zero? | |
679 | | nothing to do here | |
680 | 9: subq.l #8,%a0 | |
681 | printf PNORM,"%p(",1,%a0 | |
682 | printx PNORM,%a0@ | |
683 | printf PNORM,")\n" | |
684 | rts | |
685 | | Be careful with the X bit! It contains the lsb | |
686 | | from the shift above, it is needed for round to nearest. | |
687 | fp_nd_checkround: | |
688 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
689 | and.w #0xf800,(2,%a0) | clear bits 0-10 | |
690 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
691 | jne 2f | %d2 == 0, round to nearest | |
692 | tst.l %d0 | test guard bit | |
693 | jpl 9b | zero is closer | |
694 | | here we test the X bit by adding it to %d2 | |
695 | clr.w %d2 | first set z bit, addx only clears it | |
696 | addx.w %d2,%d2 | test lsb bit | |
697 | | IEEE754-specified "round to even" behaviour. If the guard | |
698 | | bit is set, then the number is odd, so rounding works like | |
699 | | in grade-school arithmetic (i.e. 1.5 rounds to 2.0) | |
700 | | Otherwise, an equal distance rounds towards zero, so as not | |
701 | | to produce an odd number. This is strange, but it is what | |
702 | | the standard says. | |
703 | jne fp_nd_doroundup | round to infinity | |
704 | lsl.l #1,%d0 | check low bits | |
705 | jeq 9b | round to zero | |
706 | fp_nd_doroundup: | |
707 | | round (the mantissa, that is) towards infinity | |
708 | add.l #0x800,(%a0) | |
709 | jcc 9b | no overflow, good. | |
710 | addq.l #1,-(%a0) | extend to high lword | |
711 | jcc 1f | no overflow, good. | |
712 | | Yow! we have managed to overflow the mantissa. Since this | |
713 | | only happens when %d1 was 0xfffff800, it is now zero, so | |
714 | | reset the high bit, and increment the exponent. | |
715 | move.w #0x8000,(%a0) | |
716 | addq.w #1,-(%a0) | |
717 | cmp.w #0x43ff,(%a0)+ | exponent now overflown? | |
718 | jeq fp_nd_large | yes, so make it infinity. | |
719 | 1: subq.l #4,%a0 | |
720 | printf PNORM,"%p(",1,%a0 | |
721 | printx PNORM,%a0@ | |
722 | printf PNORM,")\n" | |
723 | rts | |
724 | 2: subq.w #2,%d2 | |
725 | jcs 9b | %d2 < 2, round to zero | |
726 | jhi 3f | %d2 > 2, round to +infinity | |
727 | | Round to +Inf or -Inf. High word of %d2 contains the | |
728 | | sign of the number, by the way. | |
729 | swap %d2 | to -inf | |
730 | tst.b %d2 | |
731 | jne fp_nd_doroundup | negative, round to infinity | |
732 | jra 9b | positive, round to zero | |
733 | 3: swap %d2 | to +inf | |
734 | tst.b %d2 | |
735 | jeq fp_nd_doroundup | positive, round to infinity | |
736 | jra 9b | negative, round to zero | |
737 | | Exponent underflow. Try to make a denormal, and set it to | |
738 | | the smallest possible fraction if this fails. | |
739 | fp_nd_small: | |
740 | fp_set_sr FPSR_EXC_UNFL | set UNFL bit | |
741 | move.w #0x3c01,(-2,%a0) | 2**-1022 | |
742 | neg.w %d2 | degree of underflow | |
743 | cmp.w #32,%d2 | single or double shift? | |
744 | jcc 1f | |
745 | | Again, another 64-bit double shift. | |
746 | move.l (%a0),%d0 | |
747 | move.l %d0,%d1 | |
748 | lsr.l %d2,%d0 | |
749 | move.l %d0,(%a0)+ | |
750 | move.l (%a0),%d0 | |
751 | lsr.l %d2,%d0 | |
752 | neg.w %d2 | |
753 | add.w #32,%d2 | |
754 | lsl.l %d2,%d1 | |
755 | or.l %d1,%d0 | |
756 | move.l (%a0),%d1 | |
757 | move.l %d0,(%a0) | |
758 | | Check to see if we shifted off any significant bits | |
759 | lsl.l %d2,%d1 | |
760 | jeq fp_nd_round | Nope, round. | |
761 | bset #0,%d0 | Yes, so set the "sticky bit". | |
762 | jra fp_nd_round | Now, round. | |
763 | | Another 64-bit single shift and store | |
764 | 1: sub.w #32,%d2 | |
765 | cmp.w #32,%d2 | Do we really need to shift? | |
766 | jcc 2f | No, the number is too small. | |
767 | move.l (%a0),%d0 | |
768 | clr.l (%a0)+ | |
769 | move.l %d0,%d1 | |
770 | lsr.l %d2,%d0 | |
771 | neg.w %d2 | |
772 | add.w #32,%d2 | |
773 | | Again, check to see if we shifted off any significant bits. | |
774 | tst.l (%a0) | |
775 | jeq 1f | |
776 | bset #0,%d0 | Sticky bit. | |
777 | 1: move.l %d0,(%a0) | |
778 | lsl.l %d2,%d1 | |
779 | jeq fp_nd_round | |
780 | bset #0,%d0 | |
781 | jra fp_nd_round | |
782 | | Sorry, the number is just too small. | |
783 | 2: clr.l (%a0)+ | |
784 | clr.l (%a0) | |
785 | moveq #1,%d0 | Smallest possible fraction, | |
786 | jra fp_nd_round | round as desired. | |
787 | | zero and denormalized | |
788 | fp_nd_zero: | |
789 | tst.l (%a0)+ | |
790 | jne 1f | |
791 | tst.l (%a0) | |
792 | jne 1f | |
793 | subq.l #8,%a0 | |
794 | printf PNORM,"%p(",1,%a0 | |
795 | printx PNORM,%a0@ | |
796 | printf PNORM,")\n" | |
797 | rts | zero. nothing to do. | |
798 | | These are not merely subnormal numbers, but true denormals, | |
799 | | i.e. pathologically small (exponent is 2**-16383) numbers. | |
800 | | It is clearly impossible for even a normal extended number | |
801 | | with that exponent to fit into double precision, so just | |
802 | | write these ones off as "too darn small". | |
803 | 1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit | |
804 | clr.l (%a0) | |
805 | clr.l -(%a0) | |
806 | move.w #0x3c01,-(%a0) | i.e. 2**-1022 | |
807 | addq.l #6,%a0 | |
808 | moveq #1,%d0 | |
809 | jra fp_nd_round | round. | |
810 | | Exponent overflow. Just call it infinity. | |
811 | fp_nd_large: | |
812 | move.w #0x7ff,%d0 | |
813 | and.w (6,%a0),%d0 | |
814 | jeq 1f | |
815 | fp_set_sr FPSR_EXC_INEX2 | |
816 | 1: fp_set_sr FPSR_EXC_OVFL | |
817 | move.w (FPD_RND,FPDATA),%d2 | |
818 | jne 3f | %d2 = 0 round to nearest | |
819 | 1: move.w #0x7fff,(-2,%a0) | |
820 | clr.l (%a0)+ | |
821 | clr.l (%a0) | |
822 | 2: subq.l #8,%a0 | |
823 | printf PNORM,"%p(",1,%a0 | |
824 | printx PNORM,%a0@ | |
825 | printf PNORM,")\n" | |
826 | rts | |
827 | 3: subq.w #2,%d2 | |
828 | jcs 5f | %d2 < 2, round to zero | |
829 | jhi 4f | %d2 > 2, round to +infinity | |
830 | tst.b (-3,%a0) | to -inf | |
831 | jne 1b | |
832 | jra 5f | |
833 | 4: tst.b (-3,%a0) | to +inf | |
834 | jeq 1b | |
835 | 5: move.w #0x43fe,(-2,%a0) | |
836 | moveq #-1,%d0 | |
837 | move.l %d0,(%a0)+ | |
838 | move.w #0xf800,%d0 | |
839 | move.l %d0,(%a0) | |
840 | jra 2b | |
841 | | Infinities or NaNs | |
842 | fp_nd_huge: | |
843 | subq.l #4,%a0 | |
844 | printf PNORM,"%p(",1,%a0 | |
845 | printx PNORM,%a0@ | |
846 | printf PNORM,")\n" | |
847 | rts | |
848 | ||
849 | | fp_normalize_single: | |
850 | | normalize an extended with single (23-bit) precision | |
851 | | args: %a0 (struct fp_ext *) | |
852 | ||
853 | fp_normalize_single: | |
854 | printf PNORM,"ns: %p(",1,%a0 | |
855 | printx PNORM,%a0@ | |
856 | printf PNORM,") " | |
857 | addq.l #2,%a0 | |
858 | move.w (%a0)+,%d2 | |
859 | jeq fp_ns_zero | zero / denormalized | |
860 | cmp.w #0x7fff,%d2 | |
861 | jeq fp_ns_huge | NaN / infinitive. | |
862 | sub.w #0x4000-0x7f,%d2 | will the exponent fit? | |
863 | jcs fp_ns_small | too small. | |
864 | cmp.w #0xfe,%d2 | |
865 | jcc fp_ns_large | too big. | |
866 | move.l (%a0)+,%d0 | get high lword of mantissa | |
867 | fp_ns_round: | |
868 | tst.l (%a0) | check the low lword | |
869 | jeq 1f | |
870 | | Set a sticky bit if it is non-zero. This should only | |
871 | | affect the rounding in what would otherwise be equal- | |
872 | | distance situations, which is what we want it to do. | |
873 | bset #0,%d0 | |
874 | 1: clr.l (%a0) | zap it from memory. | |
875 | | now, round off the low 8 bits of the hi lword. | |
876 | tst.b %d0 | 8 low bits. | |
877 | jne fp_ns_checkround | Are they non-zero? | |
878 | | nothing to do here | |
879 | subq.l #8,%a0 | |
880 | printf PNORM,"%p(",1,%a0 | |
881 | printx PNORM,%a0@ | |
882 | printf PNORM,")\n" | |
883 | rts | |
884 | fp_ns_checkround: | |
885 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
886 | clr.b -(%a0) | clear low byte of high lword | |
887 | subq.l #3,%a0 | |
888 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
889 | jne 2f | %d2 == 0, round to nearest | |
890 | tst.b %d0 | test guard bit | |
891 | jpl 9f | zero is closer | |
892 | btst #8,%d0 | test lsb bit | |
893 | | round to even behaviour, see above. | |
894 | jne fp_ns_doroundup | round to infinity | |
895 | lsl.b #1,%d0 | check low bits | |
896 | jeq 9f | round to zero | |
897 | fp_ns_doroundup: | |
898 | | round (the mantissa, that is) towards infinity | |
899 | add.l #0x100,(%a0) | |
900 | jcc 9f | no overflow, good. | |
901 | | Overflow. This means that the %d1 was 0xffffff00, so it | |
902 | | is now zero. We will set the mantissa to reflect this, and | |
903 | | increment the exponent (checking for overflow there too) | |
904 | move.w #0x8000,(%a0) | |
905 | addq.w #1,-(%a0) | |
906 | cmp.w #0x407f,(%a0)+ | exponent now overflown? | |
907 | jeq fp_ns_large | yes, so make it infinity. | |
908 | 9: subq.l #4,%a0 | |
909 | printf PNORM,"%p(",1,%a0 | |
910 | printx PNORM,%a0@ | |
911 | printf PNORM,")\n" | |
912 | rts | |
913 | | check nondefault rounding modes | |
914 | 2: subq.w #2,%d2 | |
915 | jcs 9b | %d2 < 2, round to zero | |
916 | jhi 3f | %d2 > 2, round to +infinity | |
917 | tst.b (-3,%a0) | to -inf | |
918 | jne fp_ns_doroundup | negative, round to infinity | |
919 | jra 9b | positive, round to zero | |
920 | 3: tst.b (-3,%a0) | to +inf | |
921 | jeq fp_ns_doroundup | positive, round to infinity | |
922 | jra 9b | negative, round to zero | |
923 | | Exponent underflow. Try to make a denormal, and set it to | |
924 | | the smallest possible fraction if this fails. | |
925 | fp_ns_small: | |
926 | fp_set_sr FPSR_EXC_UNFL | set UNFL bit | |
927 | move.w #0x3f81,(-2,%a0) | 2**-126 | |
928 | neg.w %d2 | degree of underflow | |
929 | cmp.w #32,%d2 | single or double shift? | |
930 | jcc 2f | |
931 | | a 32-bit shift. | |
932 | move.l (%a0),%d0 | |
933 | move.l %d0,%d1 | |
934 | lsr.l %d2,%d0 | |
935 | move.l %d0,(%a0)+ | |
936 | | Check to see if we shifted off any significant bits. | |
937 | neg.w %d2 | |
938 | add.w #32,%d2 | |
939 | lsl.l %d2,%d1 | |
940 | jeq 1f | |
941 | bset #0,%d0 | Sticky bit. | |
942 | | Check the lower lword | |
943 | 1: tst.l (%a0) | |
944 | jeq fp_ns_round | |
945 | clr (%a0) | |
946 | bset #0,%d0 | Sticky bit. | |
947 | jra fp_ns_round | |
948 | | Sorry, the number is just too small. | |
949 | 2: clr.l (%a0)+ | |
950 | clr.l (%a0) | |
951 | moveq #1,%d0 | Smallest possible fraction, | |
952 | jra fp_ns_round | round as desired. | |
953 | | Exponent overflow. Just call it infinity. | |
954 | fp_ns_large: | |
955 | tst.b (3,%a0) | |
956 | jeq 1f | |
957 | fp_set_sr FPSR_EXC_INEX2 | |
958 | 1: fp_set_sr FPSR_EXC_OVFL | |
959 | move.w (FPD_RND,FPDATA),%d2 | |
960 | jne 3f | %d2 = 0 round to nearest | |
961 | 1: move.w #0x7fff,(-2,%a0) | |
962 | clr.l (%a0)+ | |
963 | clr.l (%a0) | |
964 | 2: subq.l #8,%a0 | |
965 | printf PNORM,"%p(",1,%a0 | |
966 | printx PNORM,%a0@ | |
967 | printf PNORM,")\n" | |
968 | rts | |
969 | 3: subq.w #2,%d2 | |
970 | jcs 5f | %d2 < 2, round to zero | |
971 | jhi 4f | %d2 > 2, round to +infinity | |
972 | tst.b (-3,%a0) | to -inf | |
973 | jne 1b | |
974 | jra 5f | |
975 | 4: tst.b (-3,%a0) | to +inf | |
976 | jeq 1b | |
977 | 5: move.w #0x407e,(-2,%a0) | |
978 | move.l #0xffffff00,(%a0)+ | |
979 | clr.l (%a0) | |
980 | jra 2b | |
981 | | zero and denormalized | |
982 | fp_ns_zero: | |
983 | tst.l (%a0)+ | |
984 | jne 1f | |
985 | tst.l (%a0) | |
986 | jne 1f | |
987 | subq.l #8,%a0 | |
988 | printf PNORM,"%p(",1,%a0 | |
989 | printx PNORM,%a0@ | |
990 | printf PNORM,")\n" | |
991 | rts | zero. nothing to do. | |
992 | | These are not merely subnormal numbers, but true denormals, | |
993 | | i.e. pathologically small (exponent is 2**-16383) numbers. | |
994 | | It is clearly impossible for even a normal extended number | |
995 | | with that exponent to fit into single precision, so just | |
996 | | write these ones off as "too darn small". | |
997 | 1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit | |
998 | clr.l (%a0) | |
999 | clr.l -(%a0) | |
1000 | move.w #0x3f81,-(%a0) | i.e. 2**-126 | |
1001 | addq.l #6,%a0 | |
1002 | moveq #1,%d0 | |
1003 | jra fp_ns_round | round. | |
1004 | | Infinities or NaNs | |
1005 | fp_ns_huge: | |
1006 | subq.l #4,%a0 | |
1007 | printf PNORM,"%p(",1,%a0 | |
1008 | printx PNORM,%a0@ | |
1009 | printf PNORM,")\n" | |
1010 | rts | |
1011 | ||
1012 | | fp_normalize_single_fast: | |
1013 | | normalize an extended with single (23-bit) precision | |
1014 | | this is only used by fsgldiv/fsgdlmul, where the | |
1015 | | operand is not completly normalized. | |
1016 | | args: %a0 (struct fp_ext *) | |
1017 | ||
1018 | fp_normalize_single_fast: | |
1019 | printf PNORM,"nsf: %p(",1,%a0 | |
1020 | printx PNORM,%a0@ | |
1021 | printf PNORM,") " | |
1022 | addq.l #2,%a0 | |
1023 | move.w (%a0)+,%d2 | |
1024 | cmp.w #0x7fff,%d2 | |
1025 | jeq fp_nsf_huge | NaN / infinitive. | |
1026 | move.l (%a0)+,%d0 | get high lword of mantissa | |
1027 | fp_nsf_round: | |
1028 | tst.l (%a0) | check the low lword | |
1029 | jeq 1f | |
1030 | | Set a sticky bit if it is non-zero. This should only | |
1031 | | affect the rounding in what would otherwise be equal- | |
1032 | | distance situations, which is what we want it to do. | |
1033 | bset #0,%d0 | |
1034 | 1: clr.l (%a0) | zap it from memory. | |
1035 | | now, round off the low 8 bits of the hi lword. | |
1036 | tst.b %d0 | 8 low bits. | |
1037 | jne fp_nsf_checkround | Are they non-zero? | |
1038 | | nothing to do here | |
1039 | subq.l #8,%a0 | |
1040 | printf PNORM,"%p(",1,%a0 | |
1041 | printx PNORM,%a0@ | |
1042 | printf PNORM,")\n" | |
1043 | rts | |
1044 | fp_nsf_checkround: | |
1045 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
1046 | clr.b -(%a0) | clear low byte of high lword | |
1047 | subq.l #3,%a0 | |
1048 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1049 | jne 2f | %d2 == 0, round to nearest | |
1050 | tst.b %d0 | test guard bit | |
1051 | jpl 9f | zero is closer | |
1052 | btst #8,%d0 | test lsb bit | |
1053 | | round to even behaviour, see above. | |
1054 | jne fp_nsf_doroundup | round to infinity | |
1055 | lsl.b #1,%d0 | check low bits | |
1056 | jeq 9f | round to zero | |
1057 | fp_nsf_doroundup: | |
1058 | | round (the mantissa, that is) towards infinity | |
1059 | add.l #0x100,(%a0) | |
1060 | jcc 9f | no overflow, good. | |
1061 | | Overflow. This means that the %d1 was 0xffffff00, so it | |
1062 | | is now zero. We will set the mantissa to reflect this, and | |
1063 | | increment the exponent (checking for overflow there too) | |
1064 | move.w #0x8000,(%a0) | |
1065 | addq.w #1,-(%a0) | |
1066 | cmp.w #0x407f,(%a0)+ | exponent now overflown? | |
1067 | jeq fp_nsf_large | yes, so make it infinity. | |
1068 | 9: subq.l #4,%a0 | |
1069 | printf PNORM,"%p(",1,%a0 | |
1070 | printx PNORM,%a0@ | |
1071 | printf PNORM,")\n" | |
1072 | rts | |
1073 | | check nondefault rounding modes | |
1074 | 2: subq.w #2,%d2 | |
1075 | jcs 9b | %d2 < 2, round to zero | |
1076 | jhi 3f | %d2 > 2, round to +infinity | |
1077 | tst.b (-3,%a0) | to -inf | |
1078 | jne fp_nsf_doroundup | negative, round to infinity | |
1079 | jra 9b | positive, round to zero | |
1080 | 3: tst.b (-3,%a0) | to +inf | |
1081 | jeq fp_nsf_doroundup | positive, round to infinity | |
1082 | jra 9b | negative, round to zero | |
1083 | | Exponent overflow. Just call it infinity. | |
1084 | fp_nsf_large: | |
1085 | tst.b (3,%a0) | |
1086 | jeq 1f | |
1087 | fp_set_sr FPSR_EXC_INEX2 | |
1088 | 1: fp_set_sr FPSR_EXC_OVFL | |
1089 | move.w (FPD_RND,FPDATA),%d2 | |
1090 | jne 3f | %d2 = 0 round to nearest | |
1091 | 1: move.w #0x7fff,(-2,%a0) | |
1092 | clr.l (%a0)+ | |
1093 | clr.l (%a0) | |
1094 | 2: subq.l #8,%a0 | |
1095 | printf PNORM,"%p(",1,%a0 | |
1096 | printx PNORM,%a0@ | |
1097 | printf PNORM,")\n" | |
1098 | rts | |
1099 | 3: subq.w #2,%d2 | |
1100 | jcs 5f | %d2 < 2, round to zero | |
1101 | jhi 4f | %d2 > 2, round to +infinity | |
1102 | tst.b (-3,%a0) | to -inf | |
1103 | jne 1b | |
1104 | jra 5f | |
1105 | 4: tst.b (-3,%a0) | to +inf | |
1106 | jeq 1b | |
1107 | 5: move.w #0x407e,(-2,%a0) | |
1108 | move.l #0xffffff00,(%a0)+ | |
1109 | clr.l (%a0) | |
1110 | jra 2b | |
1111 | | Infinities or NaNs | |
1112 | fp_nsf_huge: | |
1113 | subq.l #4,%a0 | |
1114 | printf PNORM,"%p(",1,%a0 | |
1115 | printx PNORM,%a0@ | |
1116 | printf PNORM,")\n" | |
1117 | rts | |
1118 | ||
1119 | | conv_ext2int (macro): | |
1120 | | Generates a subroutine that converts an extended value to an | |
1121 | | integer of a given size, again, with the appropriate type of | |
1122 | | rounding. | |
1123 | ||
1124 | | Macro arguments: | |
1125 | | s: size, as given in an assembly instruction. | |
1126 | | b: number of bits in that size. | |
1127 | ||
1128 | | Subroutine arguments: | |
1129 | | %a0: source (struct fp_ext *) | |
1130 | ||
1131 | | Returns the integer in %d0 (like it should) | |
1132 | ||
1133 | .macro conv_ext2int s,b | |
1134 | .set inf,(1<<(\b-1))-1 | i.e. MAXINT | |
1135 | printf PCONV,"e2i%d: %p(",2,#\b,%a0 | |
1136 | printx PCONV,%a0@ | |
1137 | printf PCONV,") " | |
1138 | addq.l #2,%a0 | |
1139 | move.w (%a0)+,%d2 | exponent | |
1140 | jeq fp_e2i_zero\b | zero / denorm (== 0, here) | |
1141 | cmp.w #0x7fff,%d2 | |
1142 | jeq fp_e2i_huge\b | Inf / NaN | |
1143 | sub.w #0x3ffe,%d2 | |
1144 | jcs fp_e2i_small\b | |
1145 | cmp.w #\b,%d2 | |
1146 | jhi fp_e2i_large\b | |
1147 | move.l (%a0),%d0 | |
1148 | move.l %d0,%d1 | |
1149 | lsl.l %d2,%d1 | |
1150 | jne fp_e2i_round\b | |
1151 | tst.l (4,%a0) | |
1152 | jne fp_e2i_round\b | |
1153 | neg.w %d2 | |
1154 | add.w #32,%d2 | |
1155 | lsr.l %d2,%d0 | |
1156 | 9: tst.w (-4,%a0) | |
1157 | jne 1f | |
1158 | tst.\s %d0 | |
1159 | jmi fp_e2i_large\b | |
1160 | printf PCONV,"-> %p\n",1,%d0 | |
1161 | rts | |
1162 | 1: neg.\s %d0 | |
1163 | jeq 1f | |
1164 | jpl fp_e2i_large\b | |
1165 | 1: printf PCONV,"-> %p\n",1,%d0 | |
1166 | rts | |
1167 | fp_e2i_round\b: | |
1168 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | |
1169 | neg.w %d2 | |
1170 | add.w #32,%d2 | |
1171 | .if \b>16 | |
1172 | jeq 5f | |
1173 | .endif | |
1174 | lsr.l %d2,%d0 | |
1175 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1176 | jne 2f | %d2 == 0, round to nearest | |
1177 | tst.l %d1 | test guard bit | |
1178 | jpl 9b | zero is closer | |
1179 | btst %d2,%d0 | test lsb bit (%d2 still 0) | |
1180 | jne fp_e2i_doroundup\b | |
1181 | lsl.l #1,%d1 | check low bits | |
1182 | jne fp_e2i_doroundup\b | |
1183 | tst.l (4,%a0) | |
1184 | jeq 9b | |
1185 | fp_e2i_doroundup\b: | |
1186 | addq.l #1,%d0 | |
1187 | jra 9b | |
1188 | | check nondefault rounding modes | |
1189 | 2: subq.w #2,%d2 | |
1190 | jcs 9b | %d2 < 2, round to zero | |
1191 | jhi 3f | %d2 > 2, round to +infinity | |
1192 | tst.w (-4,%a0) | to -inf | |
1193 | jne fp_e2i_doroundup\b | negative, round to infinity | |
1194 | jra 9b | positive, round to zero | |
1195 | 3: tst.w (-4,%a0) | to +inf | |
1196 | jeq fp_e2i_doroundup\b | positive, round to infinity | |
1197 | jra 9b | negative, round to zero | |
1198 | | we are only want -2**127 get correctly rounded here, | |
1199 | | since the guard bit is in the lower lword. | |
1200 | | everything else ends up anyway as overflow. | |
1201 | .if \b>16 | |
1202 | 5: move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1203 | jne 2b | %d2 == 0, round to nearest | |
1204 | move.l (4,%a0),%d1 | test guard bit | |
1205 | jpl 9b | zero is closer | |
1206 | lsl.l #1,%d1 | check low bits | |
1207 | jne fp_e2i_doroundup\b | |
1208 | jra 9b | |
1209 | .endif | |
1210 | fp_e2i_zero\b: | |
1211 | clr.l %d0 | |
1212 | tst.l (%a0)+ | |
1213 | jne 1f | |
1214 | tst.l (%a0) | |
1215 | jeq 3f | |
1216 | 1: subq.l #4,%a0 | |
1217 | fp_clr_sr FPSR_EXC_UNFL | fp_normalize_ext has set this bit | |
1218 | fp_e2i_small\b: | |
1219 | fp_set_sr FPSR_EXC_INEX2 | |
1220 | clr.l %d0 | |
1221 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | |
1222 | subq.w #2,%d2 | |
1223 | jcs 3f | %d2 < 2, round to nearest/zero | |
1224 | jhi 2f | %d2 > 2, round to +infinity | |
1225 | tst.w (-4,%a0) | to -inf | |
1226 | jeq 3f | |
1227 | subq.\s #1,%d0 | |
1228 | jra 3f | |
1229 | 2: tst.w (-4,%a0) | to +inf | |
1230 | jne 3f | |
1231 | addq.\s #1,%d0 | |
1232 | 3: printf PCONV,"-> %p\n",1,%d0 | |
1233 | rts | |
1234 | fp_e2i_large\b: | |
1235 | fp_set_sr FPSR_EXC_OPERR | |
1236 | move.\s #inf,%d0 | |
1237 | tst.w (-4,%a0) | |
1238 | jeq 1f | |
1239 | addq.\s #1,%d0 | |
1240 | 1: printf PCONV,"-> %p\n",1,%d0 | |
1241 | rts | |
1242 | fp_e2i_huge\b: | |
1243 | move.\s (%a0),%d0 | |
1244 | tst.l (%a0) | |
1245 | jne 1f | |
1246 | tst.l (%a0) | |
1247 | jeq fp_e2i_large\b | |
1248 | | fp_normalize_ext has set this bit already | |
1249 | | and made the number nonsignaling | |
1250 | 1: fp_tst_sr FPSR_EXC_SNAN | |
1251 | jne 1f | |
1252 | fp_set_sr FPSR_EXC_OPERR | |
1253 | 1: printf PCONV,"-> %p\n",1,%d0 | |
1254 | rts | |
1255 | .endm | |
1256 | ||
1257 | fp_conv_ext2long: | |
1258 | conv_ext2int l,32 | |
1259 | ||
1260 | fp_conv_ext2short: | |
1261 | conv_ext2int w,16 | |
1262 | ||
1263 | fp_conv_ext2byte: | |
1264 | conv_ext2int b,8 | |
1265 | ||
1266 | fp_conv_ext2double: | |
1267 | jsr fp_normalize_double | |
1268 | printf PCONV,"e2d: %p(",1,%a0 | |
1269 | printx PCONV,%a0@ | |
1270 | printf PCONV,"), " | |
1271 | move.l (%a0)+,%d2 | |
1272 | cmp.w #0x7fff,%d2 | |
1273 | jne 1f | |
1274 | move.w #0x7ff,%d2 | |
1275 | move.l (%a0)+,%d0 | |
1276 | jra 2f | |
1277 | 1: sub.w #0x3fff-0x3ff,%d2 | |
1278 | move.l (%a0)+,%d0 | |
1279 | jmi 2f | |
1280 | clr.w %d2 | |
1281 | 2: lsl.w #5,%d2 | |
1282 | lsl.l #7,%d2 | |
1283 | lsl.l #8,%d2 | |
1284 | move.l %d0,%d1 | |
1285 | lsl.l #1,%d0 | |
1286 | lsr.l #4,%d0 | |
1287 | lsr.l #8,%d0 | |
1288 | or.l %d2,%d0 | |
1289 | putuser.l %d0,(%a1)+,fp_err_ua2,%a1 | |
1290 | moveq #21,%d0 | |
1291 | lsl.l %d0,%d1 | |
1292 | move.l (%a0),%d0 | |
1293 | lsr.l #4,%d0 | |
1294 | lsr.l #7,%d0 | |
1295 | or.l %d1,%d0 | |
1296 | putuser.l %d0,(%a1),fp_err_ua2,%a1 | |
1297 | #ifdef FPU_EMU_DEBUG | |
1298 | getuser.l %a1@(-4),%d0,fp_err_ua2,%a1 | |
1299 | getuser.l %a1@(0),%d1,fp_err_ua2,%a1 | |
1300 | printf PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1 | |
1301 | #endif | |
1302 | rts | |
1303 | ||
1304 | fp_conv_ext2single: | |
1305 | jsr fp_normalize_single | |
1306 | printf PCONV,"e2s: %p(",1,%a0 | |
1307 | printx PCONV,%a0@ | |
1308 | printf PCONV,"), " | |
1309 | move.l (%a0)+,%d1 | |
1310 | cmp.w #0x7fff,%d1 | |
1311 | jne 1f | |
1312 | move.w #0xff,%d1 | |
1313 | move.l (%a0)+,%d0 | |
1314 | jra 2f | |
1315 | 1: sub.w #0x3fff-0x7f,%d1 | |
1316 | move.l (%a0)+,%d0 | |
1317 | jmi 2f | |
1318 | clr.w %d1 | |
1319 | 2: lsl.w #8,%d1 | |
1320 | lsl.l #7,%d1 | |
1321 | lsl.l #8,%d1 | |
1322 | bclr #31,%d0 | |
1323 | lsr.l #8,%d0 | |
1324 | or.l %d1,%d0 | |
1325 | printf PCONV,"%08x\n",1,%d0 | |
1326 | rts | |
1327 | ||
1328 | | special return addresses for instr that | |
1329 | | encode the rounding precision in the opcode | |
1330 | | (e.g. fsmove,fdmove) | |
1331 | ||
1332 | fp_finalrounding_single: | |
1333 | addq.l #8,%sp | |
1334 | jsr fp_normalize_ext | |
1335 | jsr fp_normalize_single | |
1336 | jra fp_finaltest | |
1337 | ||
1338 | fp_finalrounding_single_fast: | |
1339 | addq.l #8,%sp | |
1340 | jsr fp_normalize_ext | |
1341 | jsr fp_normalize_single_fast | |
1342 | jra fp_finaltest | |
1343 | ||
1344 | fp_finalrounding_double: | |
1345 | addq.l #8,%sp | |
1346 | jsr fp_normalize_ext | |
1347 | jsr fp_normalize_double | |
1348 | jra fp_finaltest | |
1349 | ||
1350 | | fp_finaltest: | |
1351 | | set the emulated status register based on the outcome of an | |
1352 | | emulated instruction. | |
1353 | ||
1354 | fp_finalrounding: | |
1355 | addq.l #8,%sp | |
1356 | | printf ,"f: %p\n",1,%a0 | |
1357 | jsr fp_normalize_ext | |
1358 | move.w (FPD_PREC,FPDATA),%d0 | |
1359 | subq.w #1,%d0 | |
1360 | jcs fp_finaltest | |
1361 | jne 1f | |
1362 | jsr fp_normalize_single | |
1363 | jra 2f | |
1364 | 1: jsr fp_normalize_double | |
1365 | 2:| printf ,"f: %p\n",1,%a0 | |
1366 | fp_finaltest: | |
1367 | | First, we do some of the obvious tests for the exception | |
1368 | | status byte and condition code bytes of fp_sr here, so that | |
1369 | | they do not have to be handled individually by every | |
1370 | | emulated instruction. | |
1371 | clr.l %d0 | |
1372 | addq.l #1,%a0 | |
1373 | tst.b (%a0)+ | sign | |
1374 | jeq 1f | |
1375 | bset #FPSR_CC_NEG-24,%d0 | N bit | |
1376 | 1: cmp.w #0x7fff,(%a0)+ | exponent | |
1377 | jeq 2f | |
1378 | | test for zero | |
1379 | moveq #FPSR_CC_Z-24,%d1 | |
1380 | tst.l (%a0)+ | |
1381 | jne 9f | |
1382 | tst.l (%a0) | |
1383 | jne 9f | |
1384 | jra 8f | |
1385 | | infinitiv and NAN | |
1386 | 2: moveq #FPSR_CC_NAN-24,%d1 | |
1387 | move.l (%a0)+,%d2 | |
1388 | lsl.l #1,%d2 | ignore high bit | |
1389 | jne 8f | |
1390 | tst.l (%a0) | |
1391 | jne 8f | |
1392 | moveq #FPSR_CC_INF-24,%d1 | |
1393 | 8: bset %d1,%d0 | |
1394 | 9: move.b %d0,(FPD_FPSR+0,FPDATA) | set condition test result | |
1395 | | move instructions enter here | |
1396 | | Here, we test things in the exception status byte, and set | |
1397 | | other things in the accrued exception byte accordingly. | |
1398 | | Emulated instructions can set various things in the former, | |
1399 | | as defined in fp_emu.h. | |
1400 | fp_final: | |
1401 | move.l (FPD_FPSR,FPDATA),%d0 | |
1402 | #if 0 | |
1403 | btst #FPSR_EXC_SNAN,%d0 | EXC_SNAN | |
1404 | jne 1f | |
1405 | btst #FPSR_EXC_OPERR,%d0 | EXC_OPERR | |
1406 | jeq 2f | |
1407 | 1: bset #FPSR_AEXC_IOP,%d0 | set IOP bit | |
1408 | 2: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL | |
1409 | jeq 1f | |
1410 | bset #FPSR_AEXC_OVFL,%d0 | set OVFL bit | |
1411 | 1: btst #FPSR_EXC_UNFL,%d0 | EXC_UNFL | |
1412 | jeq 1f | |
1413 | btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2 | |
1414 | jeq 1f | |
1415 | bset #FPSR_AEXC_UNFL,%d0 | set UNFL bit | |
1416 | 1: btst #FPSR_EXC_DZ,%d0 | EXC_INEX1 | |
1417 | jeq 1f | |
1418 | bset #FPSR_AEXC_DZ,%d0 | set DZ bit | |
1419 | 1: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL | |
1420 | jne 1f | |
1421 | btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2 | |
1422 | jne 1f | |
1423 | btst #FPSR_EXC_INEX1,%d0 | EXC_INEX1 | |
1424 | jeq 2f | |
1425 | 1: bset #FPSR_AEXC_INEX,%d0 | set INEX bit | |
1426 | 2: move.l %d0,(FPD_FPSR,FPDATA) | |
1427 | #else | |
1428 | | same as above, greatly optimized, but untested (yet) | |
1429 | move.l %d0,%d2 | |
1430 | lsr.l #5,%d0 | |
1431 | move.l %d0,%d1 | |
1432 | lsr.l #4,%d1 | |
1433 | or.l %d0,%d1 | |
1434 | and.b #0x08,%d1 | |
1435 | move.l %d2,%d0 | |
1436 | lsr.l #6,%d0 | |
1437 | or.l %d1,%d0 | |
1438 | move.l %d2,%d1 | |
1439 | lsr.l #4,%d1 | |
1440 | or.b #0xdf,%d1 | |
1441 | and.b %d1,%d0 | |
1442 | move.l %d2,%d1 | |
1443 | lsr.l #7,%d1 | |
1444 | and.b #0x80,%d1 | |
1445 | or.b %d1,%d0 | |
1446 | and.b #0xf8,%d0 | |
1447 | or.b %d0,%d2 | |
1448 | move.l %d2,(FPD_FPSR,FPDATA) | |
1449 | #endif | |
1450 | move.b (FPD_FPSR+2,FPDATA),%d0 | |
1451 | and.b (FPD_FPCR+2,FPDATA),%d0 | |
1452 | jeq 1f | |
1453 | printf ,"send signal!!!\n" | |
1454 | 1: jra fp_end |