arch/x86/math-emu/wm_sqrt.S

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2         .file   "wm_sqrt.S"
   3 /*---------------------------------------------------------------------------+
   4  |  wm_sqrt.S                                                                |
   5  |                                                                           |
   6  | Fixed point arithmetic square root evaluation.                            |
   7  |                                                                           |
   8  | Copyright (C) 1992,1993,1995,1997                                         |
   9  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  10  |                       Australia.  E-mail billm@suburbia.net               |
  11  |                                                                           |
  12  | Call from C as:                                                           |
  13  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
  14  |                                                                           |
  15  +---------------------------------------------------------------------------*/
  16
  17 /*---------------------------------------------------------------------------+
  18  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
  19  |    returns the square root of n in n.                                     |
  20  |                                                                           |
  21  |  Use Newton's method to compute the square root of a number, which must   |
  22  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
  23  |  Does not check the sign or tag of the argument.                          |
  24  |  Sets the exponent, but not the sign or tag of the result.                |
  25  |                                                                           |
  26  |  The guess is kept in %esi:%edi                                           |
  27  +---------------------------------------------------------------------------*/
  28
  29 #include "exception.h"
  30 #include "fpu_emu.h"
  31
  32
  33 #ifndef NON_REENTRANT_FPU
  34 /*      Local storage on the stack: */
  35 #define FPU_accum_3     -4(%ebp)        /* ms word */
  36 #define FPU_accum_2     -8(%ebp)
  37 #define FPU_accum_1     -12(%ebp)
  38 #define FPU_accum_0     -16(%ebp)
  39
  40 /*
  41  * The de-normalised argument:
  42  *                  sq_2                  sq_1              sq_0
  43  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  44  *           ^ binary point here
  45  */
  46 #define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
  47 #define FPU_fsqrt_arg_1 -24(%ebp)
  48 #define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
  49
  50 #else
  51 /*      Local storage in a static area: */
  52 .data
  53         .align 4,0
  54 FPU_accum_3:
  55         .long   0               /* ms word */
  56 FPU_accum_2:
  57         .long   0
  58 FPU_accum_1:
  59         .long   0
  60 FPU_accum_0:
  61         .long   0
  62
  63 /* The de-normalised argument:
  64                     sq_2                  sq_1              sq_0
  65           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  66              ^ binary point here
  67  */
  68 FPU_fsqrt_arg_2:
  69         .long   0               /* ms word */
  70 FPU_fsqrt_arg_1:
  71         .long   0
  72 FPU_fsqrt_arg_0:
  73         .long   0               /* ls word, at most the ms bit is set */
  74 #endif /* NON_REENTRANT_FPU */
  75
  76
  77 .text
  78 ENTRY(wm_sqrt)
  79         pushl   %ebp
  80         movl    %esp,%ebp
  81 #ifndef NON_REENTRANT_FPU
  82         subl    $28,%esp
  83 #endif /* NON_REENTRANT_FPU */
  84         pushl   %esi
  85         pushl   %edi
  86         pushl   %ebx
  87
  88         movl    PARAM1,%esi
  89
  90         movl    SIGH(%esi),%eax
  91         movl    SIGL(%esi),%ecx
  92         xorl    %edx,%edx
  93
  94 /* We use a rough linear estimate for the first guess.. */
  95
  96         cmpw    EXP_BIAS,EXP(%esi)
  97         jnz     sqrt_arg_ge_2
  98
  99         shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
 100         rcrl    $1,%ecx
 101         rcrl    $1,%edx
 102
 103 sqrt_arg_ge_2:
 104 /* From here on, n is never accessed directly again until it is
 105    replaced by the answer. */
 106
 107         movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
 108         movl    %ecx,FPU_fsqrt_arg_1
 109         movl    %edx,FPU_fsqrt_arg_0
 110
 111 /* Make a linear first estimate */
 112         shrl    $1,%eax
 113         addl    $0x40000000,%eax
 114         movl    $0xaaaaaaaa,%ecx
 115         mull    %ecx
 116         shll    %edx                    /* max result was 7fff... */
 117         testl   $0x80000000,%edx        /* but min was 3fff... */
 118         jnz     sqrt_prelim_no_adjust
 119
 120         movl    $0x80000000,%edx        /* round up */
 121
 122 sqrt_prelim_no_adjust:
 123         movl    %edx,%esi       /* Our first guess */
 124
 125 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
 126    for a few iterations of Newton's method */
 127
 128         movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
 129
 130 /*
 131  * From our initial estimate, three iterations are enough to get us
 132  * to 30 bits or so. This will then allow two iterations at better
 133  * precision to complete the process.
 134  */
 135
 136 /* Compute  (g + n/g)/2  at each iteration (g is the guess). */
 137         shrl    %ecx            /* Doing this first will prevent a divide */
 138                                 /* overflow later. */
 139
 140         movl    %ecx,%edx       /* msw of the arg / 2 */
 141         divl    %esi            /* current estimate */
 142         shrl    %esi            /* divide by 2 */
 143         addl    %eax,%esi       /* the new estimate */
 144
 145         movl    %ecx,%edx
 146         divl    %esi
 147         shrl    %esi
 148         addl    %eax,%esi
 149
 150         movl    %ecx,%edx
 151         divl    %esi
 152         shrl    %esi
 153         addl    %eax,%esi
 154
 155 /*
 156  * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 157  * we improve it to 60 bits or so.
 158  *
 159  * The strategy from now on is to compute new estimates from
 160  *      guess := guess + (n - guess^2) / (2 * guess)
 161  */
 162
 163 /* First, find the square of the guess */
 164         movl    %esi,%eax
 165         mull    %esi
 166 /* guess^2 now in %edx:%eax */
 167
 168         movl    FPU_fsqrt_arg_1,%ecx
 169         subl    %ecx,%eax
 170         movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
 171         sbbl    %ecx,%edx
 172         jnc     sqrt_stage_2_positive
 173
 174 /* Subtraction gives a negative result,
 175    negate the result before division. */
 176         notl    %edx
 177         notl    %eax
 178         addl    $1,%eax
 179         adcl    $0,%edx
 180
 181         divl    %esi
 182         movl    %eax,%ecx
 183
 184         movl    %edx,%eax
 185         divl    %esi
 186         jmp     sqrt_stage_2_finish
 187
 188 sqrt_stage_2_positive:
 189         divl    %esi
 190         movl    %eax,%ecx
 191
 192         movl    %edx,%eax
 193         divl    %esi
 194
 195         notl    %ecx
 196         notl    %eax
 197         addl    $1,%eax
 198         adcl    $0,%ecx
 199
 200 sqrt_stage_2_finish:
 201         sarl    $1,%ecx         /* divide by 2 */
 202         rcrl    $1,%eax
 203
 204         /* Form the new estimate in %esi:%edi */
 205         movl    %eax,%edi
 206         addl    %ecx,%esi
 207
 208         jnz     sqrt_stage_2_done       /* result should be [1..2) */
 209
 210 #ifdef PARANOID
 211 /* It should be possible to get here only if the arg is ffff....ffff */
 212         cmp     $0xffffffff,FPU_fsqrt_arg_1
 213         jnz     sqrt_stage_2_error
 214 #endif /* PARANOID */
 215
 216 /* The best rounded result. */
 217         xorl    %eax,%eax
 218         decl    %eax
 219         movl    %eax,%edi
 220         movl    %eax,%esi
 221         movl    $0x7fffffff,%eax
 222         jmp     sqrt_round_result
 223
 224 #ifdef PARANOID
 225 sqrt_stage_2_error:
 226         pushl   EX_INTERNAL|0x213
 227         call    EXCEPTION
 228 #endif /* PARANOID */
 229
 230 sqrt_stage_2_done:
 231
 232 /* Now the square root has been computed to better than 60 bits. */
 233
 234 /* Find the square of the guess. */
 235         movl    %edi,%eax               /* ls word of guess */
 236         mull    %edi
 237         movl    %edx,FPU_accum_1
 238
 239         movl    %esi,%eax
 240         mull    %esi
 241         movl    %edx,FPU_accum_3
 242         movl    %eax,FPU_accum_2
 243
 244         movl    %edi,%eax
 245         mull    %esi
 246         addl    %eax,FPU_accum_1
 247         adcl    %edx,FPU_accum_2
 248         adcl    $0,FPU_accum_3
 249
 250 /*      movl    %esi,%eax */
 251 /*      mull    %edi */
 252         addl    %eax,FPU_accum_1
 253         adcl    %edx,FPU_accum_2
 254         adcl    $0,FPU_accum_3
 255
 256 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
 257
 258         movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
 259         subl    %eax,FPU_accum_1
 260         movl    FPU_fsqrt_arg_1,%eax
 261         sbbl    %eax,FPU_accum_2
 262         movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
 263         sbbl    %eax,FPU_accum_3
 264         jnc     sqrt_stage_3_positive
 265
 266 /* Subtraction gives a negative result,
 267    negate the result before division */
 268         notl    FPU_accum_1
 269         notl    FPU_accum_2
 270         notl    FPU_accum_3
 271         addl    $1,FPU_accum_1
 272         adcl    $0,FPU_accum_2
 273
 274 #ifdef PARANOID
 275         adcl    $0,FPU_accum_3  /* This must be zero */
 276         jz      sqrt_stage_3_no_error
 277
 278 sqrt_stage_3_error:
 279         pushl   EX_INTERNAL|0x207
 280         call    EXCEPTION
 281
 282 sqrt_stage_3_no_error:
 283 #endif /* PARANOID */
 284
 285         movl    FPU_accum_2,%edx
 286         movl    FPU_accum_1,%eax
 287         divl    %esi
 288         movl    %eax,%ecx
 289
 290         movl    %edx,%eax
 291         divl    %esi
 292
 293         sarl    $1,%ecx         /* divide by 2 */
 294         rcrl    $1,%eax
 295
 296         /* prepare to round the result */
 297
 298         addl    %ecx,%edi
 299         adcl    $0,%esi
 300
 301         jmp     sqrt_stage_3_finished
 302
 303 sqrt_stage_3_positive:
 304         movl    FPU_accum_2,%edx
 305         movl    FPU_accum_1,%eax
 306         divl    %esi
 307         movl    %eax,%ecx
 308
 309         movl    %edx,%eax
 310         divl    %esi
 311
 312         sarl    $1,%ecx         /* divide by 2 */
 313         rcrl    $1,%eax
 314
 315         /* prepare to round the result */
 316
 317         notl    %eax            /* Negate the correction term */
 318         notl    %ecx
 319         addl    $1,%eax
 320         adcl    $0,%ecx         /* carry here ==> correction == 0 */
 321         adcl    $0xffffffff,%esi
 322
 323         addl    %ecx,%edi
 324         adcl    $0,%esi
 325
 326 sqrt_stage_3_finished:
 327
 328 /*
 329  * The result in %esi:%edi:%esi should be good to about 90 bits here,
 330  * and the rounding information here does not have sufficient accuracy
 331  * in a few rare cases.
 332  */
 333         cmpl    $0xffffffe0,%eax
 334         ja      sqrt_near_exact_x
 335
 336         cmpl    $0x00000020,%eax
 337         jb      sqrt_near_exact
 338
 339         cmpl    $0x7fffffe0,%eax
 340         jb      sqrt_round_result
 341
 342         cmpl    $0x80000020,%eax
 343         jb      sqrt_get_more_precision
 344
 345 sqrt_round_result:
 346 /* Set up for rounding operations */
 347         movl    %eax,%edx
 348         movl    %esi,%eax
 349         movl    %edi,%ebx
 350         movl    PARAM1,%edi
 351         movw    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
 352         jmp     fpu_reg_round
 353
 354
 355 sqrt_near_exact_x:
 356 /* First, the estimate must be rounded up. */
 357         addl    $1,%edi
 358         adcl    $0,%esi
 359
 360 sqrt_near_exact:
 361 /*
 362  * This is an easy case because x^1/2 is monotonic.
 363  * We need just find the square of our estimate, compare it
 364  * with the argument, and deduce whether our estimate is
 365  * above, below, or exact. We use the fact that the estimate
 366  * is known to be accurate to about 90 bits.
 367  */
 368         movl    %edi,%eax               /* ls word of guess */
 369         mull    %edi
 370         movl    %edx,%ebx               /* 2nd ls word of square */
 371         movl    %eax,%ecx               /* ls word of square */
 372
 373         movl    %edi,%eax
 374         mull    %esi
 375         addl    %eax,%ebx
 376         addl    %eax,%ebx
 377
 378 #ifdef PARANOID
 379         cmp     $0xffffffb0,%ebx
 380         jb      sqrt_near_exact_ok
 381
 382         cmp     $0x00000050,%ebx
 383         ja      sqrt_near_exact_ok
 384
 385         pushl   EX_INTERNAL|0x214
 386         call    EXCEPTION
 387
 388 sqrt_near_exact_ok:
 389 #endif /* PARANOID */
 390
 391         or      %ebx,%ebx
 392         js      sqrt_near_exact_small
 393
 394         jnz     sqrt_near_exact_large
 395
 396         or      %ebx,%edx
 397         jnz     sqrt_near_exact_large
 398
 399 /* Our estimate is exactly the right answer */
 400         xorl    %eax,%eax
 401         jmp     sqrt_round_result
 402
 403 sqrt_near_exact_small:
 404 /* Our estimate is too small */
 405         movl    $0x000000ff,%eax
 406         jmp     sqrt_round_result
 407
 408 sqrt_near_exact_large:
 409 /* Our estimate is too large, we need to decrement it */
 410         subl    $1,%edi
 411         sbbl    $0,%esi
 412         movl    $0xffffff00,%eax
 413         jmp     sqrt_round_result
 414
 415
 416 sqrt_get_more_precision:
 417 /* This case is almost the same as the above, except we start
 418    with an extra bit of precision in the estimate. */
 419         stc                     /* The extra bit. */
 420         rcll    $1,%edi         /* Shift the estimate left one bit */
 421         rcll    $1,%esi
 422
 423         movl    %edi,%eax               /* ls word of guess */
 424         mull    %edi
 425         movl    %edx,%ebx               /* 2nd ls word of square */
 426         movl    %eax,%ecx               /* ls word of square */
 427
 428         movl    %edi,%eax
 429         mull    %esi
 430         addl    %eax,%ebx
 431         addl    %eax,%ebx
 432
 433 /* Put our estimate back to its original value */
 434         stc                     /* The ms bit. */
 435         rcrl    $1,%esi         /* Shift the estimate left one bit */
 436         rcrl    $1,%edi
 437
 438 #ifdef PARANOID
 439         cmp     $0xffffff60,%ebx
 440         jb      sqrt_more_prec_ok
 441
 442         cmp     $0x000000a0,%ebx
 443         ja      sqrt_more_prec_ok
 444
 445         pushl   EX_INTERNAL|0x215
 446         call    EXCEPTION
 447
 448 sqrt_more_prec_ok:
 449 #endif /* PARANOID */
 450
 451         or      %ebx,%ebx
 452         js      sqrt_more_prec_small
 453
 454         jnz     sqrt_more_prec_large
 455
 456         or      %ebx,%ecx
 457         jnz     sqrt_more_prec_large
 458
 459 /* Our estimate is exactly the right answer */
 460         movl    $0x80000000,%eax
 461         jmp     sqrt_round_result
 462
 463 sqrt_more_prec_small:
 464 /* Our estimate is too small */
 465         movl    $0x800000ff,%eax
 466         jmp     sqrt_round_result
 467
 468 sqrt_more_prec_large:
 469 /* Our estimate is too large */
 470         movl    $0x7fffff00,%eax
 471         jmp     sqrt_round_result
 472 ENDPROC(wm_sqrt)