]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | .file "wm_sqrt.S" |
2 | /*---------------------------------------------------------------------------+ | |
3 | | wm_sqrt.S | | |
4 | | | | |
5 | | Fixed point arithmetic square root evaluation. | | |
6 | | | | |
7 | | Copyright (C) 1992,1993,1995,1997 | | |
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | |
9 | | Australia. E-mail billm@suburbia.net | | |
10 | | | | |
11 | | Call from C as: | | |
12 | | int wm_sqrt(FPU_REG *n, unsigned int control_word) | | |
13 | | | | |
14 | +---------------------------------------------------------------------------*/ | |
15 | ||
16 | /*---------------------------------------------------------------------------+ | |
17 | | wm_sqrt(FPU_REG *n, unsigned int control_word) | | |
18 | | returns the square root of n in n. | | |
19 | | | | |
20 | | Use Newton's method to compute the square root of a number, which must | | |
21 | | be in the range [1.0 .. 4.0), to 64 bits accuracy. | | |
22 | | Does not check the sign or tag of the argument. | | |
23 | | Sets the exponent, but not the sign or tag of the result. | | |
24 | | | | |
25 | | The guess is kept in %esi:%edi | | |
26 | +---------------------------------------------------------------------------*/ | |
27 | ||
28 | #include "exception.h" | |
29 | #include "fpu_emu.h" | |
30 | ||
31 | ||
32 | #ifndef NON_REENTRANT_FPU | |
33 | /* Local storage on the stack: */ | |
34 | #define FPU_accum_3 -4(%ebp) /* ms word */ | |
35 | #define FPU_accum_2 -8(%ebp) | |
36 | #define FPU_accum_1 -12(%ebp) | |
37 | #define FPU_accum_0 -16(%ebp) | |
38 | ||
39 | /* | |
40 | * The de-normalised argument: | |
41 | * sq_2 sq_1 sq_0 | |
42 | * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | |
43 | * ^ binary point here | |
44 | */ | |
45 | #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ | |
46 | #define FPU_fsqrt_arg_1 -24(%ebp) | |
47 | #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ | |
48 | ||
49 | #else | |
50 | /* Local storage in a static area: */ | |
51 | .data | |
52 | .align 4,0 | |
53 | FPU_accum_3: | |
54 | .long 0 /* ms word */ | |
55 | FPU_accum_2: | |
56 | .long 0 | |
57 | FPU_accum_1: | |
58 | .long 0 | |
59 | FPU_accum_0: | |
60 | .long 0 | |
61 | ||
62 | /* The de-normalised argument: | |
63 | sq_2 sq_1 sq_0 | |
64 | b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | |
65 | ^ binary point here | |
66 | */ | |
67 | FPU_fsqrt_arg_2: | |
68 | .long 0 /* ms word */ | |
69 | FPU_fsqrt_arg_1: | |
70 | .long 0 | |
71 | FPU_fsqrt_arg_0: | |
72 | .long 0 /* ls word, at most the ms bit is set */ | |
73 | #endif /* NON_REENTRANT_FPU */ | |
74 | ||
75 | ||
76 | .text | |
77 | ENTRY(wm_sqrt) | |
78 | pushl %ebp | |
79 | movl %esp,%ebp | |
80 | #ifndef NON_REENTRANT_FPU | |
81 | subl $28,%esp | |
82 | #endif /* NON_REENTRANT_FPU */ | |
83 | pushl %esi | |
84 | pushl %edi | |
85 | pushl %ebx | |
86 | ||
87 | movl PARAM1,%esi | |
88 | ||
89 | movl SIGH(%esi),%eax | |
90 | movl SIGL(%esi),%ecx | |
91 | xorl %edx,%edx | |
92 | ||
93 | /* We use a rough linear estimate for the first guess.. */ | |
94 | ||
95 | cmpw EXP_BIAS,EXP(%esi) | |
96 | jnz sqrt_arg_ge_2 | |
97 | ||
98 | shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ | |
99 | rcrl $1,%ecx | |
100 | rcrl $1,%edx | |
101 | ||
102 | sqrt_arg_ge_2: | |
103 | /* From here on, n is never accessed directly again until it is | |
104 | replaced by the answer. */ | |
105 | ||
106 | movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ | |
107 | movl %ecx,FPU_fsqrt_arg_1 | |
108 | movl %edx,FPU_fsqrt_arg_0 | |
109 | ||
110 | /* Make a linear first estimate */ | |
111 | shrl $1,%eax | |
112 | addl $0x40000000,%eax | |
113 | movl $0xaaaaaaaa,%ecx | |
114 | mull %ecx | |
115 | shll %edx /* max result was 7fff... */ | |
116 | testl $0x80000000,%edx /* but min was 3fff... */ | |
117 | jnz sqrt_prelim_no_adjust | |
118 | ||
119 | movl $0x80000000,%edx /* round up */ | |
120 | ||
121 | sqrt_prelim_no_adjust: | |
122 | movl %edx,%esi /* Our first guess */ | |
123 | ||
124 | /* We have now computed (approx) (2 + x) / 3, which forms the basis | |
125 | for a few iterations of Newton's method */ | |
126 | ||
127 | movl FPU_fsqrt_arg_2,%ecx /* ms word */ | |
128 | ||
129 | /* | |
130 | * From our initial estimate, three iterations are enough to get us | |
131 | * to 30 bits or so. This will then allow two iterations at better | |
132 | * precision to complete the process. | |
133 | */ | |
134 | ||
135 | /* Compute (g + n/g)/2 at each iteration (g is the guess). */ | |
136 | shrl %ecx /* Doing this first will prevent a divide */ | |
137 | /* overflow later. */ | |
138 | ||
139 | movl %ecx,%edx /* msw of the arg / 2 */ | |
140 | divl %esi /* current estimate */ | |
141 | shrl %esi /* divide by 2 */ | |
142 | addl %eax,%esi /* the new estimate */ | |
143 | ||
144 | movl %ecx,%edx | |
145 | divl %esi | |
146 | shrl %esi | |
147 | addl %eax,%esi | |
148 | ||
149 | movl %ecx,%edx | |
150 | divl %esi | |
151 | shrl %esi | |
152 | addl %eax,%esi | |
153 | ||
154 | /* | |
155 | * Now that an estimate accurate to about 30 bits has been obtained (in %esi), | |
156 | * we improve it to 60 bits or so. | |
157 | * | |
158 | * The strategy from now on is to compute new estimates from | |
159 | * guess := guess + (n - guess^2) / (2 * guess) | |
160 | */ | |
161 | ||
162 | /* First, find the square of the guess */ | |
163 | movl %esi,%eax | |
164 | mull %esi | |
165 | /* guess^2 now in %edx:%eax */ | |
166 | ||
167 | movl FPU_fsqrt_arg_1,%ecx | |
168 | subl %ecx,%eax | |
169 | movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ | |
170 | sbbl %ecx,%edx | |
171 | jnc sqrt_stage_2_positive | |
172 | ||
173 | /* Subtraction gives a negative result, | |
174 | negate the result before division. */ | |
175 | notl %edx | |
176 | notl %eax | |
177 | addl $1,%eax | |
178 | adcl $0,%edx | |
179 | ||
180 | divl %esi | |
181 | movl %eax,%ecx | |
182 | ||
183 | movl %edx,%eax | |
184 | divl %esi | |
185 | jmp sqrt_stage_2_finish | |
186 | ||
187 | sqrt_stage_2_positive: | |
188 | divl %esi | |
189 | movl %eax,%ecx | |
190 | ||
191 | movl %edx,%eax | |
192 | divl %esi | |
193 | ||
194 | notl %ecx | |
195 | notl %eax | |
196 | addl $1,%eax | |
197 | adcl $0,%ecx | |
198 | ||
199 | sqrt_stage_2_finish: | |
200 | sarl $1,%ecx /* divide by 2 */ | |
201 | rcrl $1,%eax | |
202 | ||
203 | /* Form the new estimate in %esi:%edi */ | |
204 | movl %eax,%edi | |
205 | addl %ecx,%esi | |
206 | ||
207 | jnz sqrt_stage_2_done /* result should be [1..2) */ | |
208 | ||
209 | #ifdef PARANOID | |
210 | /* It should be possible to get here only if the arg is ffff....ffff */ | |
211 | cmp $0xffffffff,FPU_fsqrt_arg_1 | |
212 | jnz sqrt_stage_2_error | |
213 | #endif /* PARANOID */ | |
214 | ||
215 | /* The best rounded result. */ | |
216 | xorl %eax,%eax | |
217 | decl %eax | |
218 | movl %eax,%edi | |
219 | movl %eax,%esi | |
220 | movl $0x7fffffff,%eax | |
221 | jmp sqrt_round_result | |
222 | ||
223 | #ifdef PARANOID | |
224 | sqrt_stage_2_error: | |
225 | pushl EX_INTERNAL|0x213 | |
226 | call EXCEPTION | |
227 | #endif /* PARANOID */ | |
228 | ||
229 | sqrt_stage_2_done: | |
230 | ||
231 | /* Now the square root has been computed to better than 60 bits. */ | |
232 | ||
233 | /* Find the square of the guess. */ | |
234 | movl %edi,%eax /* ls word of guess */ | |
235 | mull %edi | |
236 | movl %edx,FPU_accum_1 | |
237 | ||
238 | movl %esi,%eax | |
239 | mull %esi | |
240 | movl %edx,FPU_accum_3 | |
241 | movl %eax,FPU_accum_2 | |
242 | ||
243 | movl %edi,%eax | |
244 | mull %esi | |
245 | addl %eax,FPU_accum_1 | |
246 | adcl %edx,FPU_accum_2 | |
247 | adcl $0,FPU_accum_3 | |
248 | ||
249 | /* movl %esi,%eax */ | |
250 | /* mull %edi */ | |
251 | addl %eax,FPU_accum_1 | |
252 | adcl %edx,FPU_accum_2 | |
253 | adcl $0,FPU_accum_3 | |
254 | ||
255 | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ | |
256 | ||
257 | movl FPU_fsqrt_arg_0,%eax /* get normalized n */ | |
258 | subl %eax,FPU_accum_1 | |
259 | movl FPU_fsqrt_arg_1,%eax | |
260 | sbbl %eax,FPU_accum_2 | |
261 | movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ | |
262 | sbbl %eax,FPU_accum_3 | |
263 | jnc sqrt_stage_3_positive | |
264 | ||
265 | /* Subtraction gives a negative result, | |
266 | negate the result before division */ | |
267 | notl FPU_accum_1 | |
268 | notl FPU_accum_2 | |
269 | notl FPU_accum_3 | |
270 | addl $1,FPU_accum_1 | |
271 | adcl $0,FPU_accum_2 | |
272 | ||
273 | #ifdef PARANOID | |
274 | adcl $0,FPU_accum_3 /* This must be zero */ | |
275 | jz sqrt_stage_3_no_error | |
276 | ||
277 | sqrt_stage_3_error: | |
278 | pushl EX_INTERNAL|0x207 | |
279 | call EXCEPTION | |
280 | ||
281 | sqrt_stage_3_no_error: | |
282 | #endif /* PARANOID */ | |
283 | ||
284 | movl FPU_accum_2,%edx | |
285 | movl FPU_accum_1,%eax | |
286 | divl %esi | |
287 | movl %eax,%ecx | |
288 | ||
289 | movl %edx,%eax | |
290 | divl %esi | |
291 | ||
292 | sarl $1,%ecx /* divide by 2 */ | |
293 | rcrl $1,%eax | |
294 | ||
295 | /* prepare to round the result */ | |
296 | ||
297 | addl %ecx,%edi | |
298 | adcl $0,%esi | |
299 | ||
300 | jmp sqrt_stage_3_finished | |
301 | ||
302 | sqrt_stage_3_positive: | |
303 | movl FPU_accum_2,%edx | |
304 | movl FPU_accum_1,%eax | |
305 | divl %esi | |
306 | movl %eax,%ecx | |
307 | ||
308 | movl %edx,%eax | |
309 | divl %esi | |
310 | ||
311 | sarl $1,%ecx /* divide by 2 */ | |
312 | rcrl $1,%eax | |
313 | ||
314 | /* prepare to round the result */ | |
315 | ||
316 | notl %eax /* Negate the correction term */ | |
317 | notl %ecx | |
318 | addl $1,%eax | |
319 | adcl $0,%ecx /* carry here ==> correction == 0 */ | |
320 | adcl $0xffffffff,%esi | |
321 | ||
322 | addl %ecx,%edi | |
323 | adcl $0,%esi | |
324 | ||
325 | sqrt_stage_3_finished: | |
326 | ||
327 | /* | |
328 | * The result in %esi:%edi:%esi should be good to about 90 bits here, | |
329 | * and the rounding information here does not have sufficient accuracy | |
330 | * in a few rare cases. | |
331 | */ | |
332 | cmpl $0xffffffe0,%eax | |
333 | ja sqrt_near_exact_x | |
334 | ||
335 | cmpl $0x00000020,%eax | |
336 | jb sqrt_near_exact | |
337 | ||
338 | cmpl $0x7fffffe0,%eax | |
339 | jb sqrt_round_result | |
340 | ||
341 | cmpl $0x80000020,%eax | |
342 | jb sqrt_get_more_precision | |
343 | ||
344 | sqrt_round_result: | |
345 | /* Set up for rounding operations */ | |
346 | movl %eax,%edx | |
347 | movl %esi,%eax | |
348 | movl %edi,%ebx | |
349 | movl PARAM1,%edi | |
350 | movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ | |
351 | jmp fpu_reg_round | |
352 | ||
353 | ||
354 | sqrt_near_exact_x: | |
355 | /* First, the estimate must be rounded up. */ | |
356 | addl $1,%edi | |
357 | adcl $0,%esi | |
358 | ||
359 | sqrt_near_exact: | |
360 | /* | |
361 | * This is an easy case because x^1/2 is monotonic. | |
362 | * We need just find the square of our estimate, compare it | |
363 | * with the argument, and deduce whether our estimate is | |
364 | * above, below, or exact. We use the fact that the estimate | |
365 | * is known to be accurate to about 90 bits. | |
366 | */ | |
367 | movl %edi,%eax /* ls word of guess */ | |
368 | mull %edi | |
369 | movl %edx,%ebx /* 2nd ls word of square */ | |
370 | movl %eax,%ecx /* ls word of square */ | |
371 | ||
372 | movl %edi,%eax | |
373 | mull %esi | |
374 | addl %eax,%ebx | |
375 | addl %eax,%ebx | |
376 | ||
377 | #ifdef PARANOID | |
378 | cmp $0xffffffb0,%ebx | |
379 | jb sqrt_near_exact_ok | |
380 | ||
381 | cmp $0x00000050,%ebx | |
382 | ja sqrt_near_exact_ok | |
383 | ||
384 | pushl EX_INTERNAL|0x214 | |
385 | call EXCEPTION | |
386 | ||
387 | sqrt_near_exact_ok: | |
388 | #endif /* PARANOID */ | |
389 | ||
390 | or %ebx,%ebx | |
391 | js sqrt_near_exact_small | |
392 | ||
393 | jnz sqrt_near_exact_large | |
394 | ||
395 | or %ebx,%edx | |
396 | jnz sqrt_near_exact_large | |
397 | ||
398 | /* Our estimate is exactly the right answer */ | |
399 | xorl %eax,%eax | |
400 | jmp sqrt_round_result | |
401 | ||
402 | sqrt_near_exact_small: | |
403 | /* Our estimate is too small */ | |
404 | movl $0x000000ff,%eax | |
405 | jmp sqrt_round_result | |
406 | ||
407 | sqrt_near_exact_large: | |
408 | /* Our estimate is too large, we need to decrement it */ | |
409 | subl $1,%edi | |
410 | sbbl $0,%esi | |
411 | movl $0xffffff00,%eax | |
412 | jmp sqrt_round_result | |
413 | ||
414 | ||
415 | sqrt_get_more_precision: | |
416 | /* This case is almost the same as the above, except we start | |
417 | with an extra bit of precision in the estimate. */ | |
418 | stc /* The extra bit. */ | |
419 | rcll $1,%edi /* Shift the estimate left one bit */ | |
420 | rcll $1,%esi | |
421 | ||
422 | movl %edi,%eax /* ls word of guess */ | |
423 | mull %edi | |
424 | movl %edx,%ebx /* 2nd ls word of square */ | |
425 | movl %eax,%ecx /* ls word of square */ | |
426 | ||
427 | movl %edi,%eax | |
428 | mull %esi | |
429 | addl %eax,%ebx | |
430 | addl %eax,%ebx | |
431 | ||
432 | /* Put our estimate back to its original value */ | |
433 | stc /* The ms bit. */ | |
434 | rcrl $1,%esi /* Shift the estimate left one bit */ | |
435 | rcrl $1,%edi | |
436 | ||
437 | #ifdef PARANOID | |
438 | cmp $0xffffff60,%ebx | |
439 | jb sqrt_more_prec_ok | |
440 | ||
441 | cmp $0x000000a0,%ebx | |
442 | ja sqrt_more_prec_ok | |
443 | ||
444 | pushl EX_INTERNAL|0x215 | |
445 | call EXCEPTION | |
446 | ||
447 | sqrt_more_prec_ok: | |
448 | #endif /* PARANOID */ | |
449 | ||
450 | or %ebx,%ebx | |
451 | js sqrt_more_prec_small | |
452 | ||
453 | jnz sqrt_more_prec_large | |
454 | ||
455 | or %ebx,%ecx | |
456 | jnz sqrt_more_prec_large | |
457 | ||
458 | /* Our estimate is exactly the right answer */ | |
459 | movl $0x80000000,%eax | |
460 | jmp sqrt_round_result | |
461 | ||
462 | sqrt_more_prec_small: | |
463 | /* Our estimate is too small */ | |
464 | movl $0x800000ff,%eax | |
465 | jmp sqrt_round_result | |
466 | ||
467 | sqrt_more_prec_large: | |
468 | /* Our estimate is too large */ | |
469 | movl $0x7fffff00,%eax | |
470 | jmp sqrt_round_result |