]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | | |
2 | | slogn.sa 3.1 12/10/90 | |
3 | | | |
4 | | slogn computes the natural logarithm of an | |
5 | | input value. slognd does the same except the input value is a | |
6 | | denormalized number. slognp1 computes log(1+X), and slognp1d | |
7 | | computes log(1+X) for denormalized X. | |
8 | | | |
9 | | Input: Double-extended value in memory location pointed to by address | |
10 | | register a0. | |
11 | | | |
12 | | Output: log(X) or log(1+X) returned in floating-point register Fp0. | |
13 | | | |
14 | | Accuracy and Monotonicity: The returned result is within 2 ulps in | |
15 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the | |
16 | | result is subsequently rounded to double precision. The | |
17 | | result is provably monotonic in double precision. | |
18 | | | |
19 | | Speed: The program slogn takes approximately 190 cycles for input | |
20 | | argument X such that |X-1| >= 1/16, which is the usual | |
21 | | situation. For those arguments, slognp1 takes approximately | |
22 | | 210 cycles. For the less common arguments, the program will | |
23 | | run no worse than 10% slower. | |
24 | | | |
25 | | Algorithm: | |
26 | | LOGN: | |
27 | | Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in | |
28 | | u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. | |
29 | | | |
30 | | Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven | |
31 | | significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base | |
32 | | 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). | |
33 | | | |
34 | | Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, | |
35 | | log(1+u) = poly. | |
36 | | | |
37 | | Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) | |
38 | | by k*log(2) + (log(F) + poly). The values of log(F) are calculated | |
39 | | beforehand and stored in the program. | |
40 | | | |
41 | | lognp1: | |
42 | | Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in | |
43 | | u where u = 2X/(2+X). Otherwise, move on to Step 2. | |
44 | | | |
45 | | Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 | |
46 | | of the algorithm for LOGN and compute log(1+X) as | |
47 | | k*log(2) + log(F) + poly where poly approximates log(1+u), | |
48 | | u = (Y-F)/F. | |
49 | | | |
50 | | Implementation Notes: | |
51 | | Note 1. There are 64 different possible values for F, thus 64 log(F)'s | |
52 | | need to be tabulated. Moreover, the values of 1/F are also | |
53 | | tabulated so that the division in (Y-F)/F can be performed by a | |
54 | | multiplication. | |
55 | | | |
56 | | Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value | |
57 | | Y-F has to be calculated carefully when 1/2 <= X < 3/2. | |
58 | | | |
59 | | Note 3. To fully exploit the pipeline, polynomials are usually separated | |
60 | | into two parts evaluated independently before being added up. | |
61 | | | |
62 | ||
63 | | Copyright (C) Motorola, Inc. 1990 | |
64 | | All Rights Reserved | |
65 | | | |
e00d82d0 MW |
66 | | For details on the license for this file, please see the |
67 | | file, README, in this same directory. | |
1da177e4 LT |
68 | |
69 | |slogn idnt 2,1 | Motorola 040 Floating Point Software Package | |
70 | ||
71 | |section 8 | |
72 | ||
73 | #include "fpsp.h" | |
74 | ||
75 | BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 | |
76 | BOUNDS2: .long 0x3FFE8000,0x3FFFC000 | |
77 | ||
78 | LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | |
79 | ||
80 | one: .long 0x3F800000 | |
81 | zero: .long 0x00000000 | |
82 | infty: .long 0x7F800000 | |
83 | negone: .long 0xBF800000 | |
84 | ||
85 | LOGA6: .long 0x3FC2499A,0xB5E4040B | |
86 | LOGA5: .long 0xBFC555B5,0x848CB7DB | |
87 | ||
88 | LOGA4: .long 0x3FC99999,0x987D8730 | |
89 | LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 | |
90 | ||
91 | LOGA2: .long 0x3FD55555,0x555555a4 | |
92 | LOGA1: .long 0xBFE00000,0x00000008 | |
93 | ||
94 | LOGB5: .long 0x3F175496,0xADD7DAD6 | |
95 | LOGB4: .long 0x3F3C71C2,0xFE80C7E0 | |
96 | ||
97 | LOGB3: .long 0x3F624924,0x928BCCFF | |
98 | LOGB2: .long 0x3F899999,0x999995EC | |
99 | ||
100 | LOGB1: .long 0x3FB55555,0x55555555 | |
101 | TWO: .long 0x40000000,0x00000000 | |
102 | ||
103 | LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 | |
104 | ||
105 | LOGTBL: | |
106 | .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 | |
107 | .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 | |
108 | .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 | |
109 | .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 | |
110 | .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 | |
111 | .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 | |
112 | .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 | |
113 | .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 | |
114 | .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 | |
115 | .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 | |
116 | .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 | |
117 | .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 | |
118 | .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 | |
119 | .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 | |
120 | .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 | |
121 | .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 | |
122 | .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 | |
123 | .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 | |
124 | .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 | |
125 | .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 | |
126 | .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 | |
127 | .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 | |
128 | .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 | |
129 | .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 | |
130 | .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 | |
131 | .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 | |
132 | .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 | |
133 | .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 | |
134 | .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 | |
135 | .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 | |
136 | .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 | |
137 | .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 | |
138 | .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 | |
139 | .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 | |
140 | .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 | |
141 | .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 | |
142 | .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 | |
143 | .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 | |
144 | .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 | |
145 | .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 | |
146 | .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 | |
147 | .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 | |
148 | .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 | |
149 | .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 | |
150 | .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 | |
151 | .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 | |
152 | .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 | |
153 | .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 | |
154 | .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 | |
155 | .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 | |
156 | .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 | |
157 | .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 | |
158 | .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 | |
159 | .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 | |
160 | .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 | |
161 | .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 | |
162 | .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 | |
163 | .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 | |
164 | .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 | |
165 | .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 | |
166 | .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 | |
167 | .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 | |
168 | .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 | |
169 | .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 | |
170 | .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 | |
171 | .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 | |
172 | .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 | |
173 | .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 | |
174 | .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 | |
175 | .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 | |
176 | .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 | |
177 | .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 | |
178 | .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 | |
179 | .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 | |
180 | .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 | |
181 | .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 | |
182 | .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 | |
183 | .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 | |
184 | .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 | |
185 | .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 | |
186 | .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 | |
187 | .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 | |
188 | .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 | |
189 | .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 | |
190 | .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 | |
191 | .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 | |
192 | .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 | |
193 | .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 | |
194 | .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 | |
195 | .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 | |
196 | .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 | |
197 | .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 | |
198 | .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 | |
199 | .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 | |
200 | .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 | |
201 | .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 | |
202 | .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 | |
203 | .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 | |
204 | .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 | |
205 | .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 | |
206 | .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 | |
207 | .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 | |
208 | .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 | |
209 | .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 | |
210 | .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 | |
211 | .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 | |
212 | .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 | |
213 | .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 | |
214 | .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 | |
215 | .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 | |
216 | .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 | |
217 | .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 | |
218 | .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 | |
219 | .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 | |
220 | .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 | |
221 | .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 | |
222 | .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 | |
223 | .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 | |
224 | .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 | |
225 | .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 | |
226 | .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 | |
227 | .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 | |
228 | .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 | |
229 | .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 | |
230 | .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 | |
231 | .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 | |
232 | .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 | |
233 | .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 | |
234 | ||
235 | .set ADJK,L_SCR1 | |
236 | ||
237 | .set X,FP_SCR1 | |
238 | .set XDCARE,X+2 | |
239 | .set XFRAC,X+4 | |
240 | ||
241 | .set F,FP_SCR2 | |
242 | .set FFRAC,F+4 | |
243 | ||
244 | .set KLOG2,FP_SCR3 | |
245 | ||
246 | .set SAVEU,FP_SCR4 | |
247 | ||
248 | | xref t_frcinx | |
249 | |xref t_extdnrm | |
250 | |xref t_operr | |
251 | |xref t_dz | |
252 | ||
253 | .global slognd | |
254 | slognd: | |
255 | |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT | |
256 | ||
257 | movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0 | |
258 | ||
259 | |----normalize the input value by left shifting k bits (k to be determined | |
260 | |----below), adjusting exponent and storing -k to ADJK | |
261 | |----the value TWOTO100 is no longer needed. | |
262 | |----Note that this code assumes the denormalized input is NON-ZERO. | |
263 | ||
264 | moveml %d2-%d7,-(%a7) | ...save some registers | |
265 | movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. # | |
266 | movel 4(%a0),%d4 | |
267 | movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X) | |
268 | clrl %d2 | ...D2 used for holding K | |
269 | ||
270 | tstl %d4 | |
271 | bnes HiX_not0 | |
272 | ||
273 | HiX_0: | |
274 | movel %d5,%d4 | |
275 | clrl %d5 | |
276 | movel #32,%d2 | |
277 | clrl %d6 | |
278 | bfffo %d4{#0:#32},%d6 | |
279 | lsll %d6,%d4 | |
280 | addl %d6,%d2 | ...(D3,D4,D5) is normalized | |
281 | ||
282 | movel %d3,X(%a6) | |
283 | movel %d4,XFRAC(%a6) | |
284 | movel %d5,XFRAC+4(%a6) | |
285 | negl %d2 | |
286 | movel %d2,ADJK(%a6) | |
287 | fmovex X(%a6),%fp0 | |
288 | moveml (%a7)+,%d2-%d7 | ...restore registers | |
289 | lea X(%a6),%a0 | |
290 | bras LOGBGN | ...begin regular log(X) | |
291 | ||
292 | ||
293 | HiX_not0: | |
294 | clrl %d6 | |
295 | bfffo %d4{#0:#32},%d6 | ...find first 1 | |
296 | movel %d6,%d2 | ...get k | |
297 | lsll %d6,%d4 | |
298 | movel %d5,%d7 | ...a copy of D5 | |
299 | lsll %d6,%d5 | |
300 | negl %d6 | |
301 | addil #32,%d6 | |
302 | lsrl %d6,%d7 | |
303 | orl %d7,%d4 | ...(D3,D4,D5) normalized | |
304 | ||
305 | movel %d3,X(%a6) | |
306 | movel %d4,XFRAC(%a6) | |
307 | movel %d5,XFRAC+4(%a6) | |
308 | negl %d2 | |
309 | movel %d2,ADJK(%a6) | |
310 | fmovex X(%a6),%fp0 | |
311 | moveml (%a7)+,%d2-%d7 | ...restore registers | |
312 | lea X(%a6),%a0 | |
313 | bras LOGBGN | ...begin regular log(X) | |
314 | ||
315 | ||
316 | .global slogn | |
317 | slogn: | |
318 | |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S | |
319 | ||
320 | fmovex (%a0),%fp0 | ...LOAD INPUT | |
321 | movel #0x00000000,ADJK(%a6) | |
322 | ||
323 | LOGBGN: | |
324 | |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS | |
325 | |--A FINITE, NON-ZERO, NORMALIZED NUMBER. | |
326 | ||
327 | movel (%a0),%d0 | |
328 | movew 4(%a0),%d0 | |
329 | ||
330 | movel (%a0),X(%a6) | |
331 | movel 4(%a0),X+4(%a6) | |
332 | movel 8(%a0),X+8(%a6) | |
333 | ||
334 | cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE | |
335 | blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID | |
336 | cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1 | |
337 | bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16] | |
338 | ||
339 | LOGMAIN: | |
340 | |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 | |
341 | ||
342 | |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. | |
343 | |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. | |
344 | |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) | |
345 | |-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). | |
346 | |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING | |
347 | |--LOG(1+U) CAN BE VERY EFFICIENT. | |
348 | |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO | |
349 | |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. | |
350 | ||
351 | |--GET K, Y, F, AND ADDRESS OF 1/F. | |
352 | asrl #8,%d0 | |
353 | asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X | |
354 | subil #0x3FFF,%d0 | ...THIS IS K | |
355 | addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. | |
356 | lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F) | |
357 | fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT | |
358 | ||
359 | |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F | |
360 | movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X | |
361 | movel XFRAC(%a6),FFRAC(%a6) | |
362 | andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y | |
363 | oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT | |
364 | movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F | |
365 | andil #0x7E000000,%d0 | |
366 | asrl #8,%d0 | |
367 | asrl #8,%d0 | |
368 | asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT | |
369 | addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F | |
370 | ||
371 | fmovex X(%a6),%fp0 | |
372 | movel #0x3fff0000,F(%a6) | |
373 | clrl F+8(%a6) | |
374 | fsubx F(%a6),%fp0 | ...Y-F | |
375 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY | |
376 | |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K | |
377 | |--REGISTERS SAVED: FPCR, FP1, FP2 | |
378 | ||
379 | LP1CONT1: | |
380 | |--AN RE-ENTRY POINT FOR LOGNP1 | |
381 | fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F | |
382 | fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY | |
383 | fmovex %fp0,%fp2 | |
384 | fmulx %fp2,%fp2 | ...FP2 IS V=U*U | |
385 | fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1 | |
386 | ||
387 | |--LOG(1+U) IS APPROXIMATED BY | |
388 | |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS | |
389 | |--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] | |
390 | ||
391 | fmovex %fp2,%fp3 | |
392 | fmovex %fp2,%fp1 | |
393 | ||
394 | fmuld LOGA6,%fp1 | ...V*A6 | |
395 | fmuld LOGA5,%fp2 | ...V*A5 | |
396 | ||
397 | faddd LOGA4,%fp1 | ...A4+V*A6 | |
398 | faddd LOGA3,%fp2 | ...A3+V*A5 | |
399 | ||
400 | fmulx %fp3,%fp1 | ...V*(A4+V*A6) | |
401 | fmulx %fp3,%fp2 | ...V*(A3+V*A5) | |
402 | ||
403 | faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6) | |
404 | faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5) | |
405 | ||
406 | fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6)) | |
407 | addal #16,%a0 | ...ADDRESS OF LOG(F) | |
408 | fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED | |
409 | ||
410 | fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6)) | |
411 | faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED | |
412 | ||
413 | faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6)) | |
414 | fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2 | |
415 | faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U) | |
416 | ||
417 | fmovel %d1,%fpcr | |
418 | faddx KLOG2(%a6),%fp0 | ...FINAL ADD | |
419 | bra t_frcinx | |
420 | ||
421 | ||
422 | LOGNEAR1: | |
423 | |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. | |
424 | fmovex %fp0,%fp1 | |
425 | fsubs one,%fp1 | ...FP1 IS X-1 | |
426 | fadds one,%fp0 | ...FP0 IS X+1 | |
427 | faddx %fp1,%fp1 | ...FP1 IS 2(X-1) | |
428 | |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL | |
429 | |--IN U, U = 2(X-1)/(X+1) = FP1/FP0 | |
430 | ||
431 | LP1CONT2: | |
432 | |--THIS IS AN RE-ENTRY POINT FOR LOGNP1 | |
433 | fdivx %fp0,%fp1 | ...FP1 IS U | |
434 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 | |
435 | |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 | |
436 | |--LET V=U*U, W=V*V, CALCULATE | |
437 | |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY | |
438 | |--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) | |
439 | fmovex %fp1,%fp0 | |
440 | fmulx %fp0,%fp0 | ...FP0 IS V | |
441 | fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1 | |
442 | fmovex %fp0,%fp1 | |
443 | fmulx %fp1,%fp1 | ...FP1 IS W | |
444 | ||
445 | fmoved LOGB5,%fp3 | |
446 | fmoved LOGB4,%fp2 | |
447 | ||
448 | fmulx %fp1,%fp3 | ...W*B5 | |
449 | fmulx %fp1,%fp2 | ...W*B4 | |
450 | ||
451 | faddd LOGB3,%fp3 | ...B3+W*B5 | |
452 | faddd LOGB2,%fp2 | ...B2+W*B4 | |
453 | ||
454 | fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED | |
455 | ||
456 | fmulx %fp0,%fp2 | ...V*(B2+W*B4) | |
457 | ||
458 | faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5) | |
459 | fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V | |
460 | ||
461 | faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED | |
462 | fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED | |
463 | ||
464 | fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) | |
465 | ||
466 | fmovel %d1,%fpcr | |
467 | faddx SAVEU(%a6),%fp0 | |
468 | bra t_frcinx | |
469 | rts | |
470 | ||
471 | LOGNEG: | |
472 | |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID | |
473 | bra t_operr | |
474 | ||
475 | .global slognp1d | |
476 | slognp1d: | |
477 | |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT | |
478 | | Simply return the denorm | |
479 | ||
480 | bra t_extdnrm | |
481 | ||
482 | .global slognp1 | |
483 | slognp1: | |
484 | |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S | |
485 | ||
486 | fmovex (%a0),%fp0 | ...LOAD INPUT | |
487 | fabsx %fp0 |test magnitude | |
488 | fcmpx LTHOLD,%fp0 |compare with min threshold | |
489 | fbgt LP1REAL |if greater, continue | |
490 | fmovel #0,%fpsr |clr N flag from compare | |
491 | fmovel %d1,%fpcr | |
492 | fmovex (%a0),%fp0 |return signed argument | |
493 | bra t_frcinx | |
494 | ||
495 | LP1REAL: | |
496 | fmovex (%a0),%fp0 | ...LOAD INPUT | |
497 | movel #0x00000000,ADJK(%a6) | |
498 | fmovex %fp0,%fp1 | ...FP1 IS INPUT Z | |
499 | fadds one,%fp0 | ...X := ROUND(1+Z) | |
500 | fmovex %fp0,X(%a6) | |
501 | movew XFRAC(%a6),XDCARE(%a6) | |
502 | movel X(%a6),%d0 | |
503 | cmpil #0,%d0 | |
504 | ble LP1NEG0 | ...LOG OF ZERO OR -VE | |
505 | cmp2l BOUNDS2,%d0 | |
506 | bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2] | |
507 | |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, | |
508 | |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, | |
509 | |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). | |
510 | ||
511 | LP1NEAR1: | |
512 | |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) | |
513 | cmp2l BOUNDS1,%d0 | |
514 | bcss LP1CARE | |
515 | ||
516 | LP1ONE16: | |
517 | |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) | |
518 | |--WHERE U = 2Z/(2+Z) = 2Z/(1+X). | |
519 | faddx %fp1,%fp1 | ...FP1 IS 2Z | |
520 | fadds one,%fp0 | ...FP0 IS 1+X | |
521 | |--U = FP1/FP0 | |
522 | bra LP1CONT2 | |
523 | ||
524 | LP1CARE: | |
525 | |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE | |
526 | |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST | |
527 | |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], | |
528 | |--THERE ARE ONLY TWO CASES. | |
529 | |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z | |
530 | |--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z | |
531 | |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF | |
532 | |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. | |
533 | ||
534 | movel XFRAC(%a6),FFRAC(%a6) | |
535 | andil #0xFE000000,FFRAC(%a6) | |
536 | oril #0x01000000,FFRAC(%a6) | ...F OBTAINED | |
537 | cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1 | |
538 | bges KISZERO | |
539 | ||
540 | KISNEG1: | |
541 | fmoves TWO,%fp0 | |
542 | movel #0x3fff0000,F(%a6) | |
543 | clrl F+8(%a6) | |
544 | fsubx F(%a6),%fp0 | ...2-F | |
545 | movel FFRAC(%a6),%d0 | |
546 | andil #0x7E000000,%d0 | |
547 | asrl #8,%d0 | |
548 | asrl #8,%d0 | |
549 | asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F | |
550 | faddx %fp1,%fp1 | ...GET 2Z | |
551 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 | |
552 | faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z | |
553 | lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F | |
554 | addal %d0,%a0 | |
555 | fmoves negone,%fp1 | ...FP1 IS K = -1 | |
556 | bra LP1CONT1 | |
557 | ||
558 | KISZERO: | |
559 | fmoves one,%fp0 | |
560 | movel #0x3fff0000,F(%a6) | |
561 | clrl F+8(%a6) | |
562 | fsubx F(%a6),%fp0 | ...1-F | |
563 | movel FFRAC(%a6),%d0 | |
564 | andil #0x7E000000,%d0 | |
565 | asrl #8,%d0 | |
566 | asrl #8,%d0 | |
567 | asrl #4,%d0 | |
568 | faddx %fp1,%fp0 | ...FP0 IS Y-F | |
569 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED | |
570 | lea LOGTBL,%a0 | |
571 | addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F | |
572 | fmoves zero,%fp1 | ...FP1 IS K = 0 | |
573 | bra LP1CONT1 | |
574 | ||
575 | LP1NEG0: | |
576 | |--FPCR SAVED. D0 IS X IN COMPACT FORM. | |
577 | cmpil #0,%d0 | |
578 | blts LP1NEG | |
579 | LP1ZERO: | |
580 | fmoves negone,%fp0 | |
581 | ||
582 | fmovel %d1,%fpcr | |
583 | bra t_dz | |
584 | ||
585 | LP1NEG: | |
586 | fmoves zero,%fp0 | |
587 | ||
588 | fmovel %d1,%fpcr | |
589 | bra t_operr | |
590 | ||
591 | |end |