]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/m68k/fpsp040/ssin.S
Linux-2.6.12-rc2
[mirror_ubuntu-artful-kernel.git] / arch / m68k / fpsp040 / ssin.S
1 |
2 | ssin.sa 3.3 7/29/91
3 |
4 | The entry point sSIN computes the sine of an input argument
5 | sCOS computes the cosine, and sSINCOS computes both. The
6 | corresponding entry points with a "d" computes the same
7 | corresponding function values for denormalized inputs.
8 |
9 | Input: Double-extended number X in location pointed to
10 | by address register a0.
11 |
12 | Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
13 | COS is requested. Otherwise, for SINCOS, sin(X) is returned
14 | in Fp0, and cos(X) is returned in Fp1.
15 |
16 | Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
17 |
18 | Accuracy and Monotonicity: The returned result is within 1 ulp in
19 | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
20 | result is subsequently rounded to double precision. The
21 | result is provably monotonic in double precision.
22 |
23 | Speed: The programs sSIN and sCOS take approximately 150 cycles for
24 | input argument X such that |X| < 15Pi, which is the usual
25 | situation. The speed for sSINCOS is approximately 190 cycles.
26 |
27 | Algorithm:
28 |
29 | SIN and COS:
30 | 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
31 |
32 | 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
33 |
34 | 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
35 | k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
36 | k by k := k + AdjN.
37 |
38 | 4. If k is even, go to 6.
39 |
40 | 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
41 | where cos(r) is approximated by an even polynomial in r,
42 | 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
43 | Exit.
44 |
45 | 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
46 | where sin(r) is approximated by an odd polynomial in r
47 | r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
48 | Exit.
49 |
50 | 7. If |X| > 1, go to 9.
51 |
52 | 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
53 |
54 | 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
55 |
56 | SINCOS:
57 | 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
58 |
59 | 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
60 | k = N mod 4, so in particular, k = 0,1,2,or 3.
61 |
62 | 3. If k is even, go to 5.
63 |
64 | 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
65 | j1 exclusive or with the l.s.b. of k.
66 | sgn1 := (-1)**j1, sgn2 := (-1)**j2.
67 | SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
68 | sin(r) and cos(r) are computed as odd and even polynomials
69 | in r, respectively. Exit
70 |
71 | 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
72 | SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
73 | sin(r) and cos(r) are computed as odd and even polynomials
74 | in r, respectively. Exit
75 |
76 | 6. If |X| > 1, go to 8.
77 |
78 | 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
79 |
80 | 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
81 |
82
83 | Copyright (C) Motorola, Inc. 1990
84 | All Rights Reserved
85 |
86 | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
87 | The copyright notice above does not evidence any
88 | actual or intended publication of such source code.
89
90 |SSIN idnt 2,1 | Motorola 040 Floating Point Software Package
91
92 |section 8
93
94 #include "fpsp.h"
95
96 BOUNDS1: .long 0x3FD78000,0x4004BC7E
97 TWOBYPI: .long 0x3FE45F30,0x6DC9C883
98
99 SINA7: .long 0xBD6AAA77,0xCCC994F5
100 SINA6: .long 0x3DE61209,0x7AAE8DA1
101
102 SINA5: .long 0xBE5AE645,0x2A118AE4
103 SINA4: .long 0x3EC71DE3,0xA5341531
104
105 SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
106
107 SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000
108
109 SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
110
111 COSB8: .long 0x3D2AC4D0,0xD6011EE3
112 COSB7: .long 0xBDA9396F,0x9F45AC19
113
114 COSB6: .long 0x3E21EED9,0x0612C972
115 COSB5: .long 0xBE927E4F,0xB79D9FCF
116
117 COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
118
119 COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
120
121 COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
122 COSB1: .long 0xBF000000
123
124 INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
125
126 TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
127 TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
128
129 |xref PITBL
130
131 .set INARG,FP_SCR4
132
133 .set X,FP_SCR5
134 .set XDCARE,X+2
135 .set XFRAC,X+4
136
137 .set RPRIME,FP_SCR1
138 .set SPRIME,FP_SCR2
139
140 .set POSNEG1,L_SCR1
141 .set TWOTO63,L_SCR1
142
143 .set ENDFLAG,L_SCR2
144 .set N,L_SCR2
145
146 .set ADJN,L_SCR3
147
148 | xref t_frcinx
149 |xref t_extdnrm
150 |xref sto_cos
151
152 .global ssind
153 ssind:
154 |--SIN(X) = X FOR DENORMALIZED X
155 bra t_extdnrm
156
157 .global scosd
158 scosd:
159 |--COS(X) = 1 FOR DENORMALIZED X
160
161 fmoves #0x3F800000,%fp0
162 |
163 | 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
164 |
165 fmovel #0,%fpsr
166 |
167 bra t_frcinx
168
169 .global ssin
170 ssin:
171 |--SET ADJN TO 0
172 movel #0,ADJN(%a6)
173 bras SINBGN
174
175 .global scos
176 scos:
177 |--SET ADJN TO 1
178 movel #1,ADJN(%a6)
179
180 SINBGN:
181 |--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
182
183 fmovex (%a0),%fp0 | ...LOAD INPUT
184
185 movel (%a0),%d0
186 movew 4(%a0),%d0
187 fmovex %fp0,X(%a6)
188 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
189
190 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
191 bges SOK1
192 bra SINSM
193
194 SOK1:
195 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
196 blts SINMAIN
197 bra REDUCEX
198
199 SINMAIN:
200 |--THIS IS THE USUAL CASE, |X| <= 15 PI.
201 |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
202 fmovex %fp0,%fp1
203 fmuld TWOBYPI,%fp1 | ...X*2/PI
204
205 |--HIDE THE NEXT THREE INSTRUCTIONS
206 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
207
208
209 |--FP1 IS NOW READY
210 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
211
212 movel N(%a6),%d0
213 asll #4,%d0
214 addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2
215 | ...WHICH IS IN TWO PIECES Y1 & Y2
216
217 fsubx (%a1)+,%fp0 | ...X-Y1
218 |--HIDE THE NEXT ONE
219 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
220
221 SINCONT:
222 |--continuation from REDUCEX
223
224 |--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
225 movel N(%a6),%d0
226 addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN
227 rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE
228 cmpil #0,%d0
229 blt COSPOLY
230
231 SINPOLY:
232 |--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
233 |--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
234 |--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
235 |--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
236 |--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
237 |--WHERE T=S*S.
238 |--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
239 |--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
240 fmovex %fp0,X(%a6) | ...X IS R
241 fmulx %fp0,%fp0 | ...FP0 IS S
242 |---HIDE THE NEXT TWO WHILE WAITING FOR FP0
243 fmoved SINA7,%fp3
244 fmoved SINA6,%fp2
245 |--FP0 IS NOW READY
246 fmovex %fp0,%fp1
247 fmulx %fp1,%fp1 | ...FP1 IS T
248 |--HIDE THE NEXT TWO WHILE WAITING FOR FP1
249
250 rorl #1,%d0
251 andil #0x80000000,%d0
252 | ...LEAST SIG. BIT OF D0 IN SIGN POSITION
253 eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R
254
255 fmulx %fp1,%fp3 | ...TA7
256 fmulx %fp1,%fp2 | ...TA6
257
258 faddd SINA5,%fp3 | ...A5+TA7
259 faddd SINA4,%fp2 | ...A4+TA6
260
261 fmulx %fp1,%fp3 | ...T(A5+TA7)
262 fmulx %fp1,%fp2 | ...T(A4+TA6)
263
264 faddd SINA3,%fp3 | ...A3+T(A5+TA7)
265 faddx SINA2,%fp2 | ...A2+T(A4+TA6)
266
267 fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7))
268
269 fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6))
270 faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
271 fmulx X(%a6),%fp0 | ...R'*S
272
273 faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
274 |--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
275 |--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
276
277
278 fmulx %fp1,%fp0 | ...SIN(R')-R'
279 |--FP1 RELEASED.
280
281 fmovel %d1,%FPCR |restore users exceptions
282 faddx X(%a6),%fp0 |last inst - possible exception set
283 bra t_frcinx
284
285
286 COSPOLY:
287 |--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
288 |--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
289 |--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
290 |--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
291 |--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
292 |--WHERE T=S*S.
293 |--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
294 |--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
295 |--AND IS THEREFORE STORED AS SINGLE PRECISION.
296
297 fmulx %fp0,%fp0 | ...FP0 IS S
298 |---HIDE THE NEXT TWO WHILE WAITING FOR FP0
299 fmoved COSB8,%fp2
300 fmoved COSB7,%fp3
301 |--FP0 IS NOW READY
302 fmovex %fp0,%fp1
303 fmulx %fp1,%fp1 | ...FP1 IS T
304 |--HIDE THE NEXT TWO WHILE WAITING FOR FP1
305 fmovex %fp0,X(%a6) | ...X IS S
306 rorl #1,%d0
307 andil #0x80000000,%d0
308 | ...LEAST SIG. BIT OF D0 IN SIGN POSITION
309
310 fmulx %fp1,%fp2 | ...TB8
311 |--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
312 eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S
313 andil #0x80000000,%d0
314
315 fmulx %fp1,%fp3 | ...TB7
316 |--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
317 oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE
318 movel %d0,POSNEG1(%a6)
319
320 faddd COSB6,%fp2 | ...B6+TB8
321 faddd COSB5,%fp3 | ...B5+TB7
322
323 fmulx %fp1,%fp2 | ...T(B6+TB8)
324 fmulx %fp1,%fp3 | ...T(B5+TB7)
325
326 faddd COSB4,%fp2 | ...B4+T(B6+TB8)
327 faddx COSB3,%fp3 | ...B3+T(B5+TB7)
328
329 fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8))
330 fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7))
331
332 faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
333 fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
334
335 fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8)))
336 |--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
337 |--FP2 RELEASED.
338
339
340 faddx %fp1,%fp0
341 |--FP1 RELEASED
342
343 fmulx X(%a6),%fp0
344
345 fmovel %d1,%FPCR |restore users exceptions
346 fadds POSNEG1(%a6),%fp0 |last inst - possible exception set
347 bra t_frcinx
348
349
350 SINBORS:
351 |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
352 |--IF |X| < 2**(-40), RETURN X OR 1.
353 cmpil #0x3FFF8000,%d0
354 bgts REDUCEX
355
356
357 SINSM:
358 movel ADJN(%a6),%d0
359 cmpil #0,%d0
360 bgts COSTINY
361
362 SINTINY:
363 movew #0x0000,XDCARE(%a6) | ...JUST IN CASE
364 fmovel %d1,%FPCR |restore users exceptions
365 fmovex X(%a6),%fp0 |last inst - possible exception set
366 bra t_frcinx
367
368
369 COSTINY:
370 fmoves #0x3F800000,%fp0
371
372 fmovel %d1,%FPCR |restore users exceptions
373 fsubs #0x00800000,%fp0 |last inst - possible exception set
374 bra t_frcinx
375
376
377 REDUCEX:
378 |--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
379 |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
380 |--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
381
382 fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
383 movel %d2,-(%a7)
384 fmoves #0x00000000,%fp1
385 |--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
386 |--there is a danger of unwanted overflow in first LOOP iteration. In this
387 |--case, reduce argument by one remainder step to make subsequent reduction
388 |--safe.
389 cmpil #0x7ffeffff,%d0 |is argument dangerously large?
390 bnes LOOP
391 movel #0x7ffe0000,FP_SCR2(%a6) |yes
392 | ;create 2**16383*PI/2
393 movel #0xc90fdaa2,FP_SCR2+4(%a6)
394 clrl FP_SCR2+8(%a6)
395 ftstx %fp0 |test sign of argument
396 movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
397 | ;PI/2 at FP_SCR3
398 movel #0x85a308d3,FP_SCR3+4(%a6)
399 clrl FP_SCR3+8(%a6)
400 fblt red_neg
401 orw #0x8000,FP_SCR2(%a6) |positive arg
402 orw #0x8000,FP_SCR3(%a6)
403 red_neg:
404 faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact
405 fmovex %fp0,%fp1 |save high result in fp1
406 faddx FP_SCR3(%a6),%fp0 |low part of reduction
407 fsubx %fp0,%fp1 |determine low component of result
408 faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
409
410 |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
411 |--integer quotient will be stored in N
412 |--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
413
414 LOOP:
415 fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
416 movew INARG(%a6),%d0
417 movel %d0,%a1 | ...save a copy of D0
418 andil #0x00007FFF,%d0
419 subil #0x00003FFF,%d0 | ...D0 IS K
420 cmpil #28,%d0
421 bles LASTLOOP
422 CONTLOOP:
423 subil #27,%d0 | ...D0 IS L := K-27
424 movel #0,ENDFLAG(%a6)
425 bras WORK
426 LASTLOOP:
427 clrl %d0 | ...D0 IS L := 0
428 movel #1,ENDFLAG(%a6)
429
430 WORK:
431 |--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
432 |--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
433
434 |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
435 |--2**L * (PIby2_1), 2**L * (PIby2_2)
436
437 movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
438 subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
439
440 movel #0xA2F9836E,FP_SCR1+4(%a6)
441 movel #0x4E44152A,FP_SCR1+8(%a6)
442 movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
443
444 fmovex %fp0,%fp2
445 fmulx FP_SCR1(%a6),%fp2
446 |--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
447 |--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
448 |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
449 |--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
450 |--US THE DESIRED VALUE IN FLOATING POINT.
451
452 |--HIDE SIX CYCLES OF INSTRUCTION
453 movel %a1,%d2
454 swap %d2
455 andil #0x80000000,%d2
456 oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
457 movel %d2,TWOTO63(%a6)
458
459 movel %d0,%d2
460 addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
461
462 |--FP2 IS READY
463 fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
464
465 |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
466 movew %d2,FP_SCR2(%a6)
467 clrw FP_SCR2+2(%a6)
468 movel #0xC90FDAA2,FP_SCR2+4(%a6)
469 clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1
470
471 |--FP2 IS READY
472 fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
473
474 addil #0x00003FDD,%d0
475 movew %d0,FP_SCR3(%a6)
476 clrw FP_SCR3+2(%a6)
477 movel #0x85A308D3,FP_SCR3+4(%a6)
478 clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
479
480 movel ENDFLAG(%a6),%d0
481
482 |--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
483 |--P2 = 2**(L) * Piby2_2
484 fmovex %fp2,%fp4
485 fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
486 fmovex %fp2,%fp5
487 fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
488 fmovex %fp4,%fp3
489 |--we want P+p = W+w but |p| <= half ulp of P
490 |--Then, we need to compute A := R-P and a := r-p
491 faddx %fp5,%fp3 | ...FP3 is P
492 fsubx %fp3,%fp4 | ...W-P
493
494 fsubx %fp3,%fp0 | ...FP0 is A := R - P
495 faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
496
497 fmovex %fp0,%fp3 | ...FP3 A
498 fsubx %fp4,%fp1 | ...FP1 is a := r - p
499
500 |--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
501 |--|r| <= half ulp of R.
502 faddx %fp1,%fp0 | ...FP0 is R := A+a
503 |--No need to calculate r if this is the last loop
504 cmpil #0,%d0
505 bgt RESTORE
506
507 |--Need to calculate r
508 fsubx %fp0,%fp3 | ...A-R
509 faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
510 bra LOOP
511
512 RESTORE:
513 fmovel %fp2,N(%a6)
514 movel (%a7)+,%d2
515 fmovemx (%a7)+,%fp2-%fp5
516
517
518 movel ADJN(%a6),%d0
519 cmpil #4,%d0
520
521 blt SINCONT
522 bras SCCONT
523
524 .global ssincosd
525 ssincosd:
526 |--SIN AND COS OF X FOR DENORMALIZED X
527
528 fmoves #0x3F800000,%fp1
529 bsr sto_cos |store cosine result
530 bra t_extdnrm
531
532 .global ssincos
533 ssincos:
534 |--SET ADJN TO 4
535 movel #4,ADJN(%a6)
536
537 fmovex (%a0),%fp0 | ...LOAD INPUT
538
539 movel (%a0),%d0
540 movew 4(%a0),%d0
541 fmovex %fp0,X(%a6)
542 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X
543
544 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
545 bges SCOK1
546 bra SCSM
547
548 SCOK1:
549 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
550 blts SCMAIN
551 bra REDUCEX
552
553
554 SCMAIN:
555 |--THIS IS THE USUAL CASE, |X| <= 15 PI.
556 |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
557 fmovex %fp0,%fp1
558 fmuld TWOBYPI,%fp1 | ...X*2/PI
559
560 |--HIDE THE NEXT THREE INSTRUCTIONS
561 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
562
563
564 |--FP1 IS NOW READY
565 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER
566
567 movel N(%a6),%d0
568 asll #4,%d0
569 addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2
570
571 fsubx (%a1)+,%fp0 | ...X-Y1
572 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
573
574 SCCONT:
575 |--continuation point from REDUCEX
576
577 |--HIDE THE NEXT TWO
578 movel N(%a6),%d0
579 rorl #1,%d0
580
581 cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD
582 bge NEVEN
583
584 NODD:
585 |--REGISTERS SAVED SO FAR: D0, A0, FP2.
586
587 fmovex %fp0,RPRIME(%a6)
588 fmulx %fp0,%fp0 | ...FP0 IS S = R*R
589 fmoved SINA7,%fp1 | ...A7
590 fmoved COSB8,%fp2 | ...B8
591 fmulx %fp0,%fp1 | ...SA7
592 movel %d2,-(%a7)
593 movel %d0,%d2
594 fmulx %fp0,%fp2 | ...SB8
595 rorl #1,%d2
596 andil #0x80000000,%d2
597
598 faddd SINA6,%fp1 | ...A6+SA7
599 eorl %d0,%d2
600 andil #0x80000000,%d2
601 faddd COSB7,%fp2 | ...B7+SB8
602
603 fmulx %fp0,%fp1 | ...S(A6+SA7)
604 eorl %d2,RPRIME(%a6)
605 movel (%a7)+,%d2
606 fmulx %fp0,%fp2 | ...S(B7+SB8)
607 rorl #1,%d0
608 andil #0x80000000,%d0
609
610 faddd SINA5,%fp1 | ...A5+S(A6+SA7)
611 movel #0x3F800000,POSNEG1(%a6)
612 eorl %d0,POSNEG1(%a6)
613 faddd COSB6,%fp2 | ...B6+S(B7+SB8)
614
615 fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7))
616 fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8))
617 fmovex %fp0,SPRIME(%a6)
618
619 faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7))
620 eorl %d0,SPRIME(%a6)
621 faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8))
622
623 fmulx %fp0,%fp1 | ...S(A4+...)
624 fmulx %fp0,%fp2 | ...S(B5+...)
625
626 faddd SINA3,%fp1 | ...A3+S(A4+...)
627 faddd COSB4,%fp2 | ...B4+S(B5+...)
628
629 fmulx %fp0,%fp1 | ...S(A3+...)
630 fmulx %fp0,%fp2 | ...S(B4+...)
631
632 faddx SINA2,%fp1 | ...A2+S(A3+...)
633 faddx COSB3,%fp2 | ...B3+S(B4+...)
634
635 fmulx %fp0,%fp1 | ...S(A2+...)
636 fmulx %fp0,%fp2 | ...S(B3+...)
637
638 faddx SINA1,%fp1 | ...A1+S(A2+...)
639 faddx COSB2,%fp2 | ...B2+S(B3+...)
640
641 fmulx %fp0,%fp1 | ...S(A1+...)
642 fmulx %fp2,%fp0 | ...S(B2+...)
643
644
645
646 fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...)
647 fadds COSB1,%fp0 | ...B1+S(B2...)
648 fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...))
649
650 movel %d1,-(%sp) |restore users mode & precision
651 andil #0xff,%d1 |mask off all exceptions
652 fmovel %d1,%FPCR
653 faddx RPRIME(%a6),%fp1 | ...COS(X)
654 bsr sto_cos |store cosine result
655 fmovel (%sp)+,%FPCR |restore users exceptions
656 fadds POSNEG1(%a6),%fp0 | ...SIN(X)
657
658 bra t_frcinx
659
660
661 NEVEN:
662 |--REGISTERS SAVED SO FAR: FP2.
663
664 fmovex %fp0,RPRIME(%a6)
665 fmulx %fp0,%fp0 | ...FP0 IS S = R*R
666 fmoved COSB8,%fp1 | ...B8
667 fmoved SINA7,%fp2 | ...A7
668 fmulx %fp0,%fp1 | ...SB8
669 fmovex %fp0,SPRIME(%a6)
670 fmulx %fp0,%fp2 | ...SA7
671 rorl #1,%d0
672 andil #0x80000000,%d0
673 faddd COSB7,%fp1 | ...B7+SB8
674 faddd SINA6,%fp2 | ...A6+SA7
675 eorl %d0,RPRIME(%a6)
676 eorl %d0,SPRIME(%a6)
677 fmulx %fp0,%fp1 | ...S(B7+SB8)
678 oril #0x3F800000,%d0
679 movel %d0,POSNEG1(%a6)
680 fmulx %fp0,%fp2 | ...S(A6+SA7)
681
682 faddd COSB6,%fp1 | ...B6+S(B7+SB8)
683 faddd SINA5,%fp2 | ...A5+S(A6+SA7)
684
685 fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8))
686 fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7))
687
688 faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8))
689 faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7))
690
691 fmulx %fp0,%fp1 | ...S(B5+...)
692 fmulx %fp0,%fp2 | ...S(A4+...)
693
694 faddd COSB4,%fp1 | ...B4+S(B5+...)
695 faddd SINA3,%fp2 | ...A3+S(A4+...)
696
697 fmulx %fp0,%fp1 | ...S(B4+...)
698 fmulx %fp0,%fp2 | ...S(A3+...)
699
700 faddx COSB3,%fp1 | ...B3+S(B4+...)
701 faddx SINA2,%fp2 | ...A2+S(A3+...)
702
703 fmulx %fp0,%fp1 | ...S(B3+...)
704 fmulx %fp0,%fp2 | ...S(A2+...)
705
706 faddx COSB2,%fp1 | ...B2+S(B3+...)
707 faddx SINA1,%fp2 | ...A1+S(A2+...)
708
709 fmulx %fp0,%fp1 | ...S(B2+...)
710 fmulx %fp2,%fp0 | ...s(a1+...)
711
712
713
714 fadds COSB1,%fp1 | ...B1+S(B2...)
715 fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...)
716 fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...))
717
718 movel %d1,-(%sp) |save users mode & precision
719 andil #0xff,%d1 |mask off all exceptions
720 fmovel %d1,%FPCR
721 fadds POSNEG1(%a6),%fp1 | ...COS(X)
722 bsr sto_cos |store cosine result
723 fmovel (%sp)+,%FPCR |restore users exceptions
724 faddx RPRIME(%a6),%fp0 | ...SIN(X)
725
726 bra t_frcinx
727
728 SCBORS:
729 cmpil #0x3FFF8000,%d0
730 bgt REDUCEX
731
732
733 SCSM:
734 movew #0x0000,XDCARE(%a6)
735 fmoves #0x3F800000,%fp1
736
737 movel %d1,-(%sp) |save users mode & precision
738 andil #0xff,%d1 |mask off all exceptions
739 fmovel %d1,%FPCR
740 fsubs #0x00800000,%fp1
741 bsr sto_cos |store cosine result
742 fmovel (%sp)+,%FPCR |restore users exceptions
743 fmovex X(%a6),%fp0
744 bra t_frcinx
745
746 |end