[mirror_ubuntu-focal-kernel.git] / arch / i386 / math-emu / wm_sqrt.S

	.file	"wm_sqrt.S"
/*---------------------------------------------------------------------------+
 |  wm_sqrt.S                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995,1997                                         |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@suburbia.net               |
 |                                                                           |
 | Call from C as:                                                           |
 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_emu.h"


#ifndef NON_REENTRANT_FPU
/*	Local storage on the stack: */
#define FPU_accum_3	-4(%ebp)	/* ms word */
#define FPU_accum_2	-8(%ebp)
#define FPU_accum_1	-12(%ebp)
#define FPU_accum_0	-16(%ebp)

/*
 * The de-normalised argument:
 *                  sq_2                  sq_1              sq_0
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 *           ^ binary point here
 */
#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
#define FPU_fsqrt_arg_1	-24(%ebp)
#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */

#else
/*	Local storage in a static area: */
.data
	.align 4,0
FPU_accum_3:
	.long	0		/* ms word */
FPU_accum_2:
	.long	0
FPU_accum_1:
	.long	0
FPU_accum_0:
	.long	0

/* The de-normalised argument:
                    sq_2                  sq_1              sq_0
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
             ^ binary point here
 */
FPU_fsqrt_arg_2:
	.long	0		/* ms word */
FPU_fsqrt_arg_1:
	.long	0
FPU_fsqrt_arg_0:
	.long	0		/* ls word, at most the ms bit is set */
#endif /* NON_REENTRANT_FPU */ 


.text
ENTRY(wm_sqrt)
	pushl	%ebp
	movl	%esp,%ebp
#ifndef NON_REENTRANT_FPU
	subl	$28,%esp
#endif /* NON_REENTRANT_FPU */
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%esi

	movl	SIGH(%esi),%eax
	movl	SIGL(%esi),%ecx
	xorl	%edx,%edx

/* We use a rough linear estimate for the first guess.. */

	cmpw	EXP_BIAS,EXP(%esi)
	jnz	sqrt_arg_ge_2

	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
	rcrl	$1,%ecx
	rcrl	$1,%edx

sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
   replaced by the answer. */

	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
	movl	%ecx,FPU_fsqrt_arg_1
	movl	%edx,FPU_fsqrt_arg_0

/* Make a linear first estimate */
	shrl	$1,%eax
	addl	$0x40000000,%eax
	movl	$0xaaaaaaaa,%ecx
	mull	%ecx
	shll	%edx			/* max result was 7fff... */
	testl	$0x80000000,%edx	/* but min was 3fff... */
	jnz	sqrt_prelim_no_adjust

	movl	$0x80000000,%edx	/* round up */

sqrt_prelim_no_adjust:
	movl	%edx,%esi	/* Our first guess */

/* We have now computed (approx)   (2 + x) / 3, which forms the basis
   for a few iterations of Newton's method */

	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */

/*
 * From our initial estimate, three iterations are enough to get us
 * to 30 bits or so. This will then allow two iterations at better
 * precision to complete the process.
 */

/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
	shrl	%ecx		/* Doing this first will prevent a divide */
				/* overflow later. */

	movl	%ecx,%edx	/* msw of the arg / 2 */
	divl	%esi		/* current estimate */
	shrl	%esi		/* divide by 2 */
	addl	%eax,%esi	/* the new estimate */

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

/*
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */

/* First, find the square of the guess */
	movl	%esi,%eax
	mull	%esi
/* guess^2 now in %edx:%eax */

	movl	FPU_fsqrt_arg_1,%ecx
	subl	%ecx,%eax
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
	sbbl	%ecx,%edx
	jnc	sqrt_stage_2_positive

/* Subtraction gives a negative result,
   negate the result before division. */
	notl	%edx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%edx

	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi
	jmp	sqrt_stage_2_finish

sqrt_stage_2_positive:
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	notl	%ecx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%ecx

sqrt_stage_2_finish:
	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* Form the new estimate in %esi:%edi */
	movl	%eax,%edi
	addl	%ecx,%esi

	jnz	sqrt_stage_2_done	/* result should be [1..2) */

#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
	cmp	$0xffffffff,FPU_fsqrt_arg_1
	jnz	sqrt_stage_2_error
#endif /* PARANOID */

/* The best rounded result. */
	xorl	%eax,%eax
	decl	%eax
	movl	%eax,%edi
	movl	%eax,%esi
	movl	$0x7fffffff,%eax
	jmp	sqrt_round_result

#ifdef PARANOID
sqrt_stage_2_error:
	pushl	EX_INTERNAL|0x213
	call	EXCEPTION
#endif /* PARANOID */ 

sqrt_stage_2_done:

/* Now the square root has been computed to better than 60 bits. */

/* Find the square of the guess. */
	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,FPU_accum_1

	movl	%esi,%eax
	mull	%esi
	movl	%edx,FPU_accum_3
	movl	%eax,FPU_accum_2

	movl	%edi,%eax
	mull	%esi
	addl	%eax,FPU_accum_1
	adcl	%edx,FPU_accum_2
	adcl	$0,FPU_accum_3

/*	movl	%esi,%eax */
/*	mull	%edi */
	addl	%eax,FPU_accum_1
	adcl	%edx,FPU_accum_2
	adcl	$0,FPU_accum_3

/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */

	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
	subl	%eax,FPU_accum_1
	movl	FPU_fsqrt_arg_1,%eax
	sbbl	%eax,FPU_accum_2
	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
	sbbl	%eax,FPU_accum_3
	jnc	sqrt_stage_3_positive

/* Subtraction gives a negative result,
   negate the result before division */
	notl	FPU_accum_1
	notl	FPU_accum_2
	notl	FPU_accum_3
	addl	$1,FPU_accum_1
	adcl	$0,FPU_accum_2

#ifdef PARANOID
	adcl	$0,FPU_accum_3	/* This must be zero */
	jz	sqrt_stage_3_no_error

sqrt_stage_3_error:
	pushl	EX_INTERNAL|0x207
	call	EXCEPTION

sqrt_stage_3_no_error:
#endif /* PARANOID */

	movl	FPU_accum_2,%edx
	movl	FPU_accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* prepare to round the result */

	addl	%ecx,%edi
	adcl	$0,%esi

	jmp	sqrt_stage_3_finished

sqrt_stage_3_positive:
	movl	FPU_accum_2,%edx
	movl	FPU_accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* prepare to round the result */

	notl	%eax		/* Negate the correction term */
	notl	%ecx
	addl	$1,%eax
	adcl	$0,%ecx		/* carry here ==> correction == 0 */
	adcl	$0xffffffff,%esi

	addl	%ecx,%edi
	adcl	$0,%esi

sqrt_stage_3_finished:

/*
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
 * and the rounding information here does not have sufficient accuracy
 * in a few rare cases.
 */
	cmpl	$0xffffffe0,%eax
	ja	sqrt_near_exact_x

	cmpl	$0x00000020,%eax
	jb	sqrt_near_exact

	cmpl	$0x7fffffe0,%eax
	jb	sqrt_round_result

	cmpl	$0x80000020,%eax
	jb	sqrt_get_more_precision

sqrt_round_result:
/* Set up for rounding operations */
	movl	%eax,%edx
	movl	%esi,%eax
	movl	%edi,%ebx
	movl	PARAM1,%edi
	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
	jmp	fpu_reg_round


sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
	addl	$1,%edi
	adcl	$0,%esi

sqrt_near_exact:
/*
 * This is an easy case because x^1/2 is monotonic.
 * We need just find the square of our estimate, compare it
 * with the argument, and deduce whether our estimate is
 * above, below, or exact. We use the fact that the estimate
 * is known to be accurate to about 90 bits.
 */
	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,%ebx		/* 2nd ls word of square */
	movl	%eax,%ecx		/* ls word of square */

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

#ifdef PARANOID
	cmp	$0xffffffb0,%ebx
	jb	sqrt_near_exact_ok

	cmp	$0x00000050,%ebx
	ja	sqrt_near_exact_ok

	pushl	EX_INTERNAL|0x214
	call	EXCEPTION

sqrt_near_exact_ok:
#endif /* PARANOID */ 

	or	%ebx,%ebx
	js	sqrt_near_exact_small

	jnz	sqrt_near_exact_large

	or	%ebx,%edx
	jnz	sqrt_near_exact_large

/* Our estimate is exactly the right answer */
	xorl	%eax,%eax
	jmp	sqrt_round_result

sqrt_near_exact_small:
/* Our estimate is too small */
	movl	$0x000000ff,%eax
	jmp	sqrt_round_result
	
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
	subl	$1,%edi
	sbbl	$0,%esi
	movl	$0xffffff00,%eax
	jmp	sqrt_round_result


sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
   with an extra bit of precision in the estimate. */
	stc			/* The extra bit. */
	rcll	$1,%edi		/* Shift the estimate left one bit */
	rcll	$1,%esi

	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,%ebx		/* 2nd ls word of square */
	movl	%eax,%ecx		/* ls word of square */

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

/* Put our estimate back to its original value */
	stc			/* The ms bit. */
	rcrl	$1,%esi		/* Shift the estimate left one bit */
	rcrl	$1,%edi

#ifdef PARANOID
	cmp	$0xffffff60,%ebx
	jb	sqrt_more_prec_ok

	cmp	$0x000000a0,%ebx
	ja	sqrt_more_prec_ok

	pushl	EX_INTERNAL|0x215
	call	EXCEPTION

sqrt_more_prec_ok:
#endif /* PARANOID */ 

	or	%ebx,%ebx
	js	sqrt_more_prec_small

	jnz	sqrt_more_prec_large

	or	%ebx,%ecx
	jnz	sqrt_more_prec_large

/* Our estimate is exactly the right answer */
	movl	$0x80000000,%eax
	jmp	sqrt_round_result

sqrt_more_prec_small:
/* Our estimate is too small */
	movl	$0x800000ff,%eax
	jmp	sqrt_round_result
	
sqrt_more_prec_large:
/* Our estimate is too large */
	movl	$0x7fffff00,%eax
	jmp	sqrt_round_result
Commit	Line	Data
1da177e4 LT	1	.file "wm_sqrt.S"
	2	/*---------------------------------------------------------------------------+
	3	\| wm_sqrt.S \|
	4	\| \|
	5	\| Fixed point arithmetic square root evaluation. \|
	6	\| \|
	7	\| Copyright (C) 1992,1993,1995,1997 \|
	8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	9	\| Australia. E-mail billm@suburbia.net \|
	10	\| \|
	11	\| Call from C as: \|
	12	\| int wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	13	\| \|
	14	+---------------------------------------------------------------------------*/
	15
	16	/*---------------------------------------------------------------------------+
	17	\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	18	\| returns the square root of n in n. \|
	19	\| \|
	20	\| Use Newton's method to compute the square root of a number, which must \|
	21	\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|
	22	\| Does not check the sign or tag of the argument. \|
	23	\| Sets the exponent, but not the sign or tag of the result. \|
	24	\| \|
	25	\| The guess is kept in %esi:%edi \|
	26	+---------------------------------------------------------------------------*/
	27
	28	#include "exception.h"
	29	#include "fpu_emu.h"
	30
	31
	32	#ifndef NON_REENTRANT_FPU
	33	/* Local storage on the stack: */
	34	#define FPU_accum_3 -4(%ebp) /* ms word */
	35	#define FPU_accum_2 -8(%ebp)
	36	#define FPU_accum_1 -12(%ebp)
	37	#define FPU_accum_0 -16(%ebp)
	38
	39	/*
	40	* The de-normalised argument:
	41	* sq_2 sq_1 sq_0
	42	* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
	43	* ^ binary point here
	44	*/
	45	#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
	46	#define FPU_fsqrt_arg_1 -24(%ebp)
	47	#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
	48
	49	#else
	50	/* Local storage in a static area: */
	51	.data
	52	.align 4,0
	53	FPU_accum_3:
	54	.long 0 /* ms word */
	55	FPU_accum_2:
	56	.long 0
	57	FPU_accum_1:
	58	.long 0
	59	FPU_accum_0:
	60	.long 0
	61
	62	/* The de-normalised argument:
	63	sq_2 sq_1 sq_0
	64	b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
65	^ binary point here
66	*/
67	FPU_fsqrt_arg_2:
68	.long 0 /* ms word */
69	FPU_fsqrt_arg_1:
70	.long 0
71	FPU_fsqrt_arg_0:
72	.long 0 /* ls word, at most the ms bit is set */
73	#endif /* NON_REENTRANT_FPU */
74
75
76	.text
77	ENTRY(wm_sqrt)
78	pushl %ebp
79	movl %esp,%ebp
80	#ifndef NON_REENTRANT_FPU
81	subl $28,%esp
82	#endif /* NON_REENTRANT_FPU */
83	pushl %esi
84	pushl %edi
85	pushl %ebx
86
87	movl PARAM1,%esi
88
89	movl SIGH(%esi),%eax
90	movl SIGL(%esi),%ecx
91	xorl %edx,%edx
92
93	/* We use a rough linear estimate for the first guess.. */
94
95	cmpw EXP_BIAS,EXP(%esi)
96	jnz sqrt_arg_ge_2
97
98	shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
99	rcrl $1,%ecx
100	rcrl $1,%edx
101
102	sqrt_arg_ge_2:
103	/* From here on, n is never accessed directly again until it is
104	replaced by the answer. */
105
106	movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
107	movl %ecx,FPU_fsqrt_arg_1
108	movl %edx,FPU_fsqrt_arg_0
109
110	/* Make a linear first estimate */
111	shrl $1,%eax
112	addl $0x40000000,%eax
113	movl $0xaaaaaaaa,%ecx
114	mull %ecx
115	shll %edx /* max result was 7fff... */
116	testl $0x80000000,%edx /* but min was 3fff... */
117	jnz sqrt_prelim_no_adjust
118
119	movl $0x80000000,%edx /* round up */
120
121	sqrt_prelim_no_adjust:
122	movl %edx,%esi /* Our first guess */
123
124	/* We have now computed (approx) (2 + x) / 3, which forms the basis
125	for a few iterations of Newton's method */
126
127	movl FPU_fsqrt_arg_2,%ecx /* ms word */
128
129	/*
130	* From our initial estimate, three iterations are enough to get us
131	* to 30 bits or so. This will then allow two iterations at better
132	* precision to complete the process.
133	*/
134
135	/* Compute (g + n/g)/2 at each iteration (g is the guess). */
136	shrl %ecx /* Doing this first will prevent a divide */
137	/* overflow later. */
138
139	movl %ecx,%edx /* msw of the arg / 2 */
140	divl %esi /* current estimate */
141	shrl %esi /* divide by 2 */
142	addl %eax,%esi /* the new estimate */
143
144	movl %ecx,%edx
145	divl %esi
146	shrl %esi
147	addl %eax,%esi
148
149	movl %ecx,%edx
150	divl %esi
151	shrl %esi
152	addl %eax,%esi
153
154	/*
155	* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156	* we improve it to 60 bits or so.
157	*
158	* The strategy from now on is to compute new estimates from
159	* guess := guess + (n - guess^2) / (2 * guess)
160	*/
161
162	/* First, find the square of the guess */
163	movl %esi,%eax
164	mull %esi
165	/* guess^2 now in %edx:%eax */
166
167	movl FPU_fsqrt_arg_1,%ecx
168	subl %ecx,%eax
169	movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
170	sbbl %ecx,%edx
171	jnc sqrt_stage_2_positive
172
173	/* Subtraction gives a negative result,
174	negate the result before division. */
175	notl %edx
176	notl %eax
177	addl $1,%eax
178	adcl $0,%edx
179
180	divl %esi
181	movl %eax,%ecx
182
183	movl %edx,%eax
184	divl %esi
185	jmp sqrt_stage_2_finish
186
187	sqrt_stage_2_positive:
188	divl %esi
189	movl %eax,%ecx
190
191	movl %edx,%eax
192	divl %esi
193
194	notl %ecx
195	notl %eax
196	addl $1,%eax
197	adcl $0,%ecx
198
199	sqrt_stage_2_finish:
200	sarl $1,%ecx /* divide by 2 */
201	rcrl $1,%eax
202
203	/* Form the new estimate in %esi:%edi */
204	movl %eax,%edi
205	addl %ecx,%esi
206
207	jnz sqrt_stage_2_done /* result should be [1..2) */
208
209	#ifdef PARANOID
210	/* It should be possible to get here only if the arg is ffff....ffff */
211	cmp $0xffffffff,FPU_fsqrt_arg_1
212	jnz sqrt_stage_2_error
213	#endif /* PARANOID */
214
215	/* The best rounded result. */
216	xorl %eax,%eax
217	decl %eax
218	movl %eax,%edi
219	movl %eax,%esi
220	movl $0x7fffffff,%eax
221	jmp sqrt_round_result
222
223	#ifdef PARANOID
224	sqrt_stage_2_error:
225	pushl EX_INTERNAL\|0x213
226	call EXCEPTION
227	#endif /* PARANOID */
228
229	sqrt_stage_2_done:
230
231	/* Now the square root has been computed to better than 60 bits. */
232
233	/* Find the square of the guess. */
234	movl %edi,%eax /* ls word of guess */
235	mull %edi
236	movl %edx,FPU_accum_1
237
238	movl %esi,%eax
239	mull %esi
240	movl %edx,FPU_accum_3
241	movl %eax,FPU_accum_2
242
243	movl %edi,%eax
244	mull %esi
245	addl %eax,FPU_accum_1
246	adcl %edx,FPU_accum_2
247	adcl $0,FPU_accum_3
248
249	/* movl %esi,%eax */
250	/* mull %edi */
251	addl %eax,FPU_accum_1
252	adcl %edx,FPU_accum_2
253	adcl $0,FPU_accum_3
254
255	/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
257	movl FPU_fsqrt_arg_0,%eax /* get normalized n */
258	subl %eax,FPU_accum_1
259	movl FPU_fsqrt_arg_1,%eax
260	sbbl %eax,FPU_accum_2
261	movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
262	sbbl %eax,FPU_accum_3
263	jnc sqrt_stage_3_positive
264
265	/* Subtraction gives a negative result,
266	negate the result before division */
267	notl FPU_accum_1
268	notl FPU_accum_2
269	notl FPU_accum_3
270	addl $1,FPU_accum_1
271	adcl $0,FPU_accum_2
272
273	#ifdef PARANOID
274	adcl $0,FPU_accum_3 /* This must be zero */
275	jz sqrt_stage_3_no_error
276
277	sqrt_stage_3_error:
278	pushl EX_INTERNAL\|0x207
279	call EXCEPTION
280
281	sqrt_stage_3_no_error:
282	#endif /* PARANOID */
283
284	movl FPU_accum_2,%edx
285	movl FPU_accum_1,%eax
286	divl %esi
287	movl %eax,%ecx
288
289	movl %edx,%eax
290	divl %esi
291
292	sarl $1,%ecx /* divide by 2 */
293	rcrl $1,%eax
294
295	/* prepare to round the result */
296
297	addl %ecx,%edi
298	adcl $0,%esi
299
300	jmp sqrt_stage_3_finished
301
302	sqrt_stage_3_positive:
303	movl FPU_accum_2,%edx
304	movl FPU_accum_1,%eax
305	divl %esi
306	movl %eax,%ecx
307
308	movl %edx,%eax
309	divl %esi
310
311	sarl $1,%ecx /* divide by 2 */
312	rcrl $1,%eax
313
314	/* prepare to round the result */
315
316	notl %eax /* Negate the correction term */
317	notl %ecx
318	addl $1,%eax
319	adcl $0,%ecx /* carry here ==> correction == 0 */
320	adcl $0xffffffff,%esi
321
322	addl %ecx,%edi
323	adcl $0,%esi
324
325	sqrt_stage_3_finished:
326
327	/*
328	* The result in %esi:%edi:%esi should be good to about 90 bits here,
329	* and the rounding information here does not have sufficient accuracy
330	* in a few rare cases.
331	*/
332	cmpl $0xffffffe0,%eax
333	ja sqrt_near_exact_x
334
335	cmpl $0x00000020,%eax
336	jb sqrt_near_exact
337
338	cmpl $0x7fffffe0,%eax
339	jb sqrt_round_result
340
341	cmpl $0x80000020,%eax
342	jb sqrt_get_more_precision
343
344	sqrt_round_result:
345	/* Set up for rounding operations */
346	movl %eax,%edx
347	movl %esi,%eax
348	movl %edi,%ebx
349	movl PARAM1,%edi
350	movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
351	jmp fpu_reg_round
352
353
354	sqrt_near_exact_x:
355	/* First, the estimate must be rounded up. */
356	addl $1,%edi
357	adcl $0,%esi
358
359	sqrt_near_exact:
360	/*
361	* This is an easy case because x^1/2 is monotonic.
362	* We need just find the square of our estimate, compare it
363	* with the argument, and deduce whether our estimate is
364	* above, below, or exact. We use the fact that the estimate
365	* is known to be accurate to about 90 bits.
366	*/
367	movl %edi,%eax /* ls word of guess */
368	mull %edi
369	movl %edx,%ebx /* 2nd ls word of square */
370	movl %eax,%ecx /* ls word of square */
371
372	movl %edi,%eax
373	mull %esi
374	addl %eax,%ebx
375	addl %eax,%ebx
376
377	#ifdef PARANOID
378	cmp $0xffffffb0,%ebx
379	jb sqrt_near_exact_ok
380
381	cmp $0x00000050,%ebx
382	ja sqrt_near_exact_ok
383
384	pushl EX_INTERNAL\|0x214
385	call EXCEPTION
386
387	sqrt_near_exact_ok:
388	#endif /* PARANOID */
389
390	or %ebx,%ebx
391	js sqrt_near_exact_small
392
393	jnz sqrt_near_exact_large
394
395	or %ebx,%edx
396	jnz sqrt_near_exact_large
397
398	/* Our estimate is exactly the right answer */
399	xorl %eax,%eax
400	jmp sqrt_round_result
401
402	sqrt_near_exact_small:
403	/* Our estimate is too small */
404	movl $0x000000ff,%eax
405	jmp sqrt_round_result
406
407	sqrt_near_exact_large:
408	/* Our estimate is too large, we need to decrement it */
409	subl $1,%edi
410	sbbl $0,%esi
411	movl $0xffffff00,%eax
412	jmp sqrt_round_result
413
414
415	sqrt_get_more_precision:
416	/* This case is almost the same as the above, except we start
417	with an extra bit of precision in the estimate. */
418	stc /* The extra bit. */
419	rcll $1,%edi /* Shift the estimate left one bit */
420	rcll $1,%esi
421
422	movl %edi,%eax /* ls word of guess */
423	mull %edi
424	movl %edx,%ebx /* 2nd ls word of square */
425	movl %eax,%ecx /* ls word of square */
426
427	movl %edi,%eax
428	mull %esi
429	addl %eax,%ebx
430	addl %eax,%ebx
431
432	/* Put our estimate back to its original value */
433	stc /* The ms bit. */
434	rcrl $1,%esi /* Shift the estimate left one bit */
435	rcrl $1,%edi
436
437	#ifdef PARANOID
438	cmp $0xffffff60,%ebx
439	jb sqrt_more_prec_ok
440
441	cmp $0x000000a0,%ebx
442	ja sqrt_more_prec_ok
443
444	pushl EX_INTERNAL\|0x215
445	call EXCEPTION
446
447	sqrt_more_prec_ok:
448	#endif /* PARANOID */
449
450	or %ebx,%ebx
451	js sqrt_more_prec_small
452
453	jnz sqrt_more_prec_large
454
455	or %ebx,%ecx
456	jnz sqrt_more_prec_large
457
458	/* Our estimate is exactly the right answer */
459	movl $0x80000000,%eax
460	jmp sqrt_round_result
461
462	sqrt_more_prec_small:
463	/* Our estimate is too small */
464	movl $0x800000ff,%eax
465	jmp sqrt_round_result
466
467	sqrt_more_prec_large:
468	/* Our estimate is too large */
469	movl $0x7fffff00,%eax
470	jmp sqrt_round_result