]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/x86/math-emu/wm_sqrt.S
License cleanup: add SPDX GPL-2.0 license identifier to files with no license
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / math-emu / wm_sqrt.S
1 /* SPDX-License-Identifier: GPL-2.0 */
2 .file "wm_sqrt.S"
3 /*---------------------------------------------------------------------------+
4 | wm_sqrt.S |
5 | |
6 | Fixed point arithmetic square root evaluation. |
7 | |
8 | Copyright (C) 1992,1993,1995,1997 |
9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
10 | Australia. E-mail billm@suburbia.net |
11 | |
12 | Call from C as: |
13 | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
14 | |
15 +---------------------------------------------------------------------------*/
16
17 /*---------------------------------------------------------------------------+
18 | wm_sqrt(FPU_REG *n, unsigned int control_word) |
19 | returns the square root of n in n. |
20 | |
21 | Use Newton's method to compute the square root of a number, which must |
22 | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
23 | Does not check the sign or tag of the argument. |
24 | Sets the exponent, but not the sign or tag of the result. |
25 | |
26 | The guess is kept in %esi:%edi |
27 +---------------------------------------------------------------------------*/
28
29 #include "exception.h"
30 #include "fpu_emu.h"
31
32
33 #ifndef NON_REENTRANT_FPU
34 /* Local storage on the stack: */
35 #define FPU_accum_3 -4(%ebp) /* ms word */
36 #define FPU_accum_2 -8(%ebp)
37 #define FPU_accum_1 -12(%ebp)
38 #define FPU_accum_0 -16(%ebp)
39
40 /*
41 * The de-normalised argument:
42 * sq_2 sq_1 sq_0
43 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
44 * ^ binary point here
45 */
46 #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
47 #define FPU_fsqrt_arg_1 -24(%ebp)
48 #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
49
50 #else
51 /* Local storage in a static area: */
52 .data
53 .align 4,0
54 FPU_accum_3:
55 .long 0 /* ms word */
56 FPU_accum_2:
57 .long 0
58 FPU_accum_1:
59 .long 0
60 FPU_accum_0:
61 .long 0
62
63 /* The de-normalised argument:
64 sq_2 sq_1 sq_0
65 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
66 ^ binary point here
67 */
68 FPU_fsqrt_arg_2:
69 .long 0 /* ms word */
70 FPU_fsqrt_arg_1:
71 .long 0
72 FPU_fsqrt_arg_0:
73 .long 0 /* ls word, at most the ms bit is set */
74 #endif /* NON_REENTRANT_FPU */
75
76
77 .text
78 ENTRY(wm_sqrt)
79 pushl %ebp
80 movl %esp,%ebp
81 #ifndef NON_REENTRANT_FPU
82 subl $28,%esp
83 #endif /* NON_REENTRANT_FPU */
84 pushl %esi
85 pushl %edi
86 pushl %ebx
87
88 movl PARAM1,%esi
89
90 movl SIGH(%esi),%eax
91 movl SIGL(%esi),%ecx
92 xorl %edx,%edx
93
94 /* We use a rough linear estimate for the first guess.. */
95
96 cmpw EXP_BIAS,EXP(%esi)
97 jnz sqrt_arg_ge_2
98
99 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
100 rcrl $1,%ecx
101 rcrl $1,%edx
102
103 sqrt_arg_ge_2:
104 /* From here on, n is never accessed directly again until it is
105 replaced by the answer. */
106
107 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
108 movl %ecx,FPU_fsqrt_arg_1
109 movl %edx,FPU_fsqrt_arg_0
110
111 /* Make a linear first estimate */
112 shrl $1,%eax
113 addl $0x40000000,%eax
114 movl $0xaaaaaaaa,%ecx
115 mull %ecx
116 shll %edx /* max result was 7fff... */
117 testl $0x80000000,%edx /* but min was 3fff... */
118 jnz sqrt_prelim_no_adjust
119
120 movl $0x80000000,%edx /* round up */
121
122 sqrt_prelim_no_adjust:
123 movl %edx,%esi /* Our first guess */
124
125 /* We have now computed (approx) (2 + x) / 3, which forms the basis
126 for a few iterations of Newton's method */
127
128 movl FPU_fsqrt_arg_2,%ecx /* ms word */
129
130 /*
131 * From our initial estimate, three iterations are enough to get us
132 * to 30 bits or so. This will then allow two iterations at better
133 * precision to complete the process.
134 */
135
136 /* Compute (g + n/g)/2 at each iteration (g is the guess). */
137 shrl %ecx /* Doing this first will prevent a divide */
138 /* overflow later. */
139
140 movl %ecx,%edx /* msw of the arg / 2 */
141 divl %esi /* current estimate */
142 shrl %esi /* divide by 2 */
143 addl %eax,%esi /* the new estimate */
144
145 movl %ecx,%edx
146 divl %esi
147 shrl %esi
148 addl %eax,%esi
149
150 movl %ecx,%edx
151 divl %esi
152 shrl %esi
153 addl %eax,%esi
154
155 /*
156 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157 * we improve it to 60 bits or so.
158 *
159 * The strategy from now on is to compute new estimates from
160 * guess := guess + (n - guess^2) / (2 * guess)
161 */
162
163 /* First, find the square of the guess */
164 movl %esi,%eax
165 mull %esi
166 /* guess^2 now in %edx:%eax */
167
168 movl FPU_fsqrt_arg_1,%ecx
169 subl %ecx,%eax
170 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
171 sbbl %ecx,%edx
172 jnc sqrt_stage_2_positive
173
174 /* Subtraction gives a negative result,
175 negate the result before division. */
176 notl %edx
177 notl %eax
178 addl $1,%eax
179 adcl $0,%edx
180
181 divl %esi
182 movl %eax,%ecx
183
184 movl %edx,%eax
185 divl %esi
186 jmp sqrt_stage_2_finish
187
188 sqrt_stage_2_positive:
189 divl %esi
190 movl %eax,%ecx
191
192 movl %edx,%eax
193 divl %esi
194
195 notl %ecx
196 notl %eax
197 addl $1,%eax
198 adcl $0,%ecx
199
200 sqrt_stage_2_finish:
201 sarl $1,%ecx /* divide by 2 */
202 rcrl $1,%eax
203
204 /* Form the new estimate in %esi:%edi */
205 movl %eax,%edi
206 addl %ecx,%esi
207
208 jnz sqrt_stage_2_done /* result should be [1..2) */
209
210 #ifdef PARANOID
211 /* It should be possible to get here only if the arg is ffff....ffff */
212 cmp $0xffffffff,FPU_fsqrt_arg_1
213 jnz sqrt_stage_2_error
214 #endif /* PARANOID */
215
216 /* The best rounded result. */
217 xorl %eax,%eax
218 decl %eax
219 movl %eax,%edi
220 movl %eax,%esi
221 movl $0x7fffffff,%eax
222 jmp sqrt_round_result
223
224 #ifdef PARANOID
225 sqrt_stage_2_error:
226 pushl EX_INTERNAL|0x213
227 call EXCEPTION
228 #endif /* PARANOID */
229
230 sqrt_stage_2_done:
231
232 /* Now the square root has been computed to better than 60 bits. */
233
234 /* Find the square of the guess. */
235 movl %edi,%eax /* ls word of guess */
236 mull %edi
237 movl %edx,FPU_accum_1
238
239 movl %esi,%eax
240 mull %esi
241 movl %edx,FPU_accum_3
242 movl %eax,FPU_accum_2
243
244 movl %edi,%eax
245 mull %esi
246 addl %eax,FPU_accum_1
247 adcl %edx,FPU_accum_2
248 adcl $0,FPU_accum_3
249
250 /* movl %esi,%eax */
251 /* mull %edi */
252 addl %eax,FPU_accum_1
253 adcl %edx,FPU_accum_2
254 adcl $0,FPU_accum_3
255
256 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
257
258 movl FPU_fsqrt_arg_0,%eax /* get normalized n */
259 subl %eax,FPU_accum_1
260 movl FPU_fsqrt_arg_1,%eax
261 sbbl %eax,FPU_accum_2
262 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
263 sbbl %eax,FPU_accum_3
264 jnc sqrt_stage_3_positive
265
266 /* Subtraction gives a negative result,
267 negate the result before division */
268 notl FPU_accum_1
269 notl FPU_accum_2
270 notl FPU_accum_3
271 addl $1,FPU_accum_1
272 adcl $0,FPU_accum_2
273
274 #ifdef PARANOID
275 adcl $0,FPU_accum_3 /* This must be zero */
276 jz sqrt_stage_3_no_error
277
278 sqrt_stage_3_error:
279 pushl EX_INTERNAL|0x207
280 call EXCEPTION
281
282 sqrt_stage_3_no_error:
283 #endif /* PARANOID */
284
285 movl FPU_accum_2,%edx
286 movl FPU_accum_1,%eax
287 divl %esi
288 movl %eax,%ecx
289
290 movl %edx,%eax
291 divl %esi
292
293 sarl $1,%ecx /* divide by 2 */
294 rcrl $1,%eax
295
296 /* prepare to round the result */
297
298 addl %ecx,%edi
299 adcl $0,%esi
300
301 jmp sqrt_stage_3_finished
302
303 sqrt_stage_3_positive:
304 movl FPU_accum_2,%edx
305 movl FPU_accum_1,%eax
306 divl %esi
307 movl %eax,%ecx
308
309 movl %edx,%eax
310 divl %esi
311
312 sarl $1,%ecx /* divide by 2 */
313 rcrl $1,%eax
314
315 /* prepare to round the result */
316
317 notl %eax /* Negate the correction term */
318 notl %ecx
319 addl $1,%eax
320 adcl $0,%ecx /* carry here ==> correction == 0 */
321 adcl $0xffffffff,%esi
322
323 addl %ecx,%edi
324 adcl $0,%esi
325
326 sqrt_stage_3_finished:
327
328 /*
329 * The result in %esi:%edi:%esi should be good to about 90 bits here,
330 * and the rounding information here does not have sufficient accuracy
331 * in a few rare cases.
332 */
333 cmpl $0xffffffe0,%eax
334 ja sqrt_near_exact_x
335
336 cmpl $0x00000020,%eax
337 jb sqrt_near_exact
338
339 cmpl $0x7fffffe0,%eax
340 jb sqrt_round_result
341
342 cmpl $0x80000020,%eax
343 jb sqrt_get_more_precision
344
345 sqrt_round_result:
346 /* Set up for rounding operations */
347 movl %eax,%edx
348 movl %esi,%eax
349 movl %edi,%ebx
350 movl PARAM1,%edi
351 movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
352 jmp fpu_reg_round
353
354
355 sqrt_near_exact_x:
356 /* First, the estimate must be rounded up. */
357 addl $1,%edi
358 adcl $0,%esi
359
360 sqrt_near_exact:
361 /*
362 * This is an easy case because x^1/2 is monotonic.
363 * We need just find the square of our estimate, compare it
364 * with the argument, and deduce whether our estimate is
365 * above, below, or exact. We use the fact that the estimate
366 * is known to be accurate to about 90 bits.
367 */
368 movl %edi,%eax /* ls word of guess */
369 mull %edi
370 movl %edx,%ebx /* 2nd ls word of square */
371 movl %eax,%ecx /* ls word of square */
372
373 movl %edi,%eax
374 mull %esi
375 addl %eax,%ebx
376 addl %eax,%ebx
377
378 #ifdef PARANOID
379 cmp $0xffffffb0,%ebx
380 jb sqrt_near_exact_ok
381
382 cmp $0x00000050,%ebx
383 ja sqrt_near_exact_ok
384
385 pushl EX_INTERNAL|0x214
386 call EXCEPTION
387
388 sqrt_near_exact_ok:
389 #endif /* PARANOID */
390
391 or %ebx,%ebx
392 js sqrt_near_exact_small
393
394 jnz sqrt_near_exact_large
395
396 or %ebx,%edx
397 jnz sqrt_near_exact_large
398
399 /* Our estimate is exactly the right answer */
400 xorl %eax,%eax
401 jmp sqrt_round_result
402
403 sqrt_near_exact_small:
404 /* Our estimate is too small */
405 movl $0x000000ff,%eax
406 jmp sqrt_round_result
407
408 sqrt_near_exact_large:
409 /* Our estimate is too large, we need to decrement it */
410 subl $1,%edi
411 sbbl $0,%esi
412 movl $0xffffff00,%eax
413 jmp sqrt_round_result
414
415
416 sqrt_get_more_precision:
417 /* This case is almost the same as the above, except we start
418 with an extra bit of precision in the estimate. */
419 stc /* The extra bit. */
420 rcll $1,%edi /* Shift the estimate left one bit */
421 rcll $1,%esi
422
423 movl %edi,%eax /* ls word of guess */
424 mull %edi
425 movl %edx,%ebx /* 2nd ls word of square */
426 movl %eax,%ecx /* ls word of square */
427
428 movl %edi,%eax
429 mull %esi
430 addl %eax,%ebx
431 addl %eax,%ebx
432
433 /* Put our estimate back to its original value */
434 stc /* The ms bit. */
435 rcrl $1,%esi /* Shift the estimate left one bit */
436 rcrl $1,%edi
437
438 #ifdef PARANOID
439 cmp $0xffffff60,%ebx
440 jb sqrt_more_prec_ok
441
442 cmp $0x000000a0,%ebx
443 ja sqrt_more_prec_ok
444
445 pushl EX_INTERNAL|0x215
446 call EXCEPTION
447
448 sqrt_more_prec_ok:
449 #endif /* PARANOID */
450
451 or %ebx,%ebx
452 js sqrt_more_prec_small
453
454 jnz sqrt_more_prec_large
455
456 or %ebx,%ecx
457 jnz sqrt_more_prec_large
458
459 /* Our estimate is exactly the right answer */
460 movl $0x80000000,%eax
461 jmp sqrt_round_result
462
463 sqrt_more_prec_small:
464 /* Our estimate is too small */
465 movl $0x800000ff,%eax
466 jmp sqrt_round_result
467
468 sqrt_more_prec_large:
469 /* Our estimate is too large */
470 movl $0x7fffff00,%eax
471 jmp sqrt_round_result
472 ENDPROC(wm_sqrt)