]> git.proxmox.com Git - mirror_edk2.git/blame - StdLib/LibC/Math/k_rem_pio2.c
ArmPkg: ArmLib: purge incorrect ArmDrainWriteBuffer () alias
[mirror_edk2.git] / StdLib / LibC / Math / k_rem_pio2.c
CommitLineData
2aa62f2b 1/* @(#)k_rem_pio2.c 5.1 93/09/24 */\r
2/*\r
3 * ====================================================\r
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.\r
5 *\r
6 * Developed at SunPro, a Sun Microsystems, Inc. business.\r
7 * Permission to use, copy, modify, and distribute this\r
8 * software is freely granted, provided that this notice\r
9 * is preserved.\r
10 * ====================================================\r
11 */\r
12#include <LibConfig.h>\r
13#include <sys/EfiCdefs.h>\r
14#if defined(LIBM_SCCS) && !defined(lint)\r
15__RCSID("$NetBSD: k_rem_pio2.c,v 1.11 2003/01/04 23:43:03 wiz Exp $");\r
16#endif\r
17\r
18/*\r
19 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)\r
20 * double x[],y[]; int e0,nx,prec; int ipio2[];\r
21 *\r
22 * __kernel_rem_pio2 return the last three digits of N with\r
23 * y = x - N*pi/2\r
24 * so that |y| < pi/2.\r
25 *\r
26 * The method is to compute the integer (mod 8) and fraction parts of\r
27 * (2/pi)*x without doing the full multiplication. In general we\r
28 * skip the part of the product that are known to be a huge integer (\r
29 * more accurately, = 0 mod 8 ). Thus the number of operations are\r
30 * independent of the exponent of the input.\r
31 *\r
32 * (2/pi) is represented by an array of 24-bit integers in ipio2[].\r
33 *\r
34 * Input parameters:\r
35 * x[] The input value (must be positive) is broken into nx\r
36 * pieces of 24-bit integers in double precision format.\r
37 * x[i] will be the i-th 24 bit of x. The scaled exponent\r
38 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0\r
39 * match x's up to 24 bits.\r
40 *\r
41 * Example of breaking a double positive z into x[0]+x[1]+x[2]:\r
42 * e0 = ilogb(z)-23\r
43 * z = scalbn(z,-e0)\r
44 * for i = 0,1,2\r
45 * x[i] = floor(z)\r
46 * z = (z-x[i])*2**24\r
47 *\r
48 *\r
49 * y[] output result in an array of double precision numbers.\r
50 * The dimension of y[] is:\r
51 * 24-bit precision 1\r
52 * 53-bit precision 2\r
53 * 64-bit precision 2\r
54 * 113-bit precision 3\r
55 * The actual value is the sum of them. Thus for 113-bit\r
56 * precison, one may have to do something like:\r
57 *\r
58 * long double t,w,r_head, r_tail;\r
59 * t = (long double)y[2] + (long double)y[1];\r
60 * w = (long double)y[0];\r
61 * r_head = t+w;\r
62 * r_tail = w - (r_head - t);\r
63 *\r
64 * e0 The exponent of x[0]\r
65 *\r
66 * nx dimension of x[]\r
67 *\r
68 * prec an integer indicating the precision:\r
69 * 0 24 bits (single)\r
70 * 1 53 bits (double)\r
71 * 2 64 bits (extended)\r
72 * 3 113 bits (quad)\r
73 *\r
74 * ipio2[]\r
75 * integer array, contains the (24*i)-th to (24*i+23)-th\r
76 * bit of 2/pi after binary point. The corresponding\r
77 * floating value is\r
78 *\r
79 * ipio2[i] * 2^(-24(i+1)).\r
80 *\r
81 * External function:\r
82 * double scalbn(), floor();\r
83 *\r
84 *\r
85 * Here is the description of some local variables:\r
86 *\r
87 * jk jk+1 is the initial number of terms of ipio2[] needed\r
88 * in the computation. The recommended value is 2,3,4,\r
89 * 6 for single, double, extended,and quad.\r
90 *\r
91 * jz local integer variable indicating the number of\r
92 * terms of ipio2[] used.\r
93 *\r
94 * jx nx - 1\r
95 *\r
96 * jv index for pointing to the suitable ipio2[] for the\r
97 * computation. In general, we want\r
98 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8\r
99 * is an integer. Thus\r
100 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv\r
101 * Hence jv = max(0,(e0-3)/24).\r
102 *\r
103 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.\r
104 *\r
105 * q[] double array with integral value, representing the\r
106 * 24-bits chunk of the product of x and 2/pi.\r
107 *\r
108 * q0 the corresponding exponent of q[0]. Note that the\r
109 * exponent for q[i] would be q0-24*i.\r
110 *\r
111 * PIo2[] double precision array, obtained by cutting pi/2\r
112 * into 24 bits chunks.\r
113 *\r
114 * f[] ipio2[] in floating point\r
115 *\r
116 * iq[] integer array by breaking up q[] in 24-bits chunk.\r
117 *\r
118 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]\r
119 *\r
120 * ih integer. If >0 it indicates q[] is >= 0.5, hence\r
121 * it also indicates the *sign* of the result.\r
122 *\r
123 */\r
124\r
125\r
126/*\r
127 * Constants:\r
128 * The hexadecimal values are the intended ones for the following\r
129 * constants. The decimal values may be used, provided that the\r
130 * compiler will convert from decimal to binary accurately enough\r
131 * to produce the hexadecimal values shown.\r
132 */\r
133\r
134#include "math.h"\r
135#include "math_private.h"\r
136\r
137static const int init_jk[] = {2,3,4,6}; /* initial value for jk */\r
138\r
139static const double PIo2[] = {\r
140 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */\r
141 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */\r
142 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */\r
143 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */\r
144 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */\r
145 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */\r
146 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */\r
147 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */\r
148};\r
149\r
150static const double\r
151zero = 0.0,\r
152one = 1.0,\r
153two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */\r
154twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */\r
155\r
156int\r
157__kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2)\r
158{\r
159 int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;\r
160 double z,fw,f[20],fq[20],q[20];\r
161\r
162 /* initialize jk*/\r
163 jk = init_jk[prec];\r
164 jp = jk;\r
165\r
166 /* determine jx,jv,q0, note that 3>q0 */\r
167 jx = nx-1;\r
168 jv = (e0-3)/24; if(jv<0) jv=0;\r
169 q0 = e0-24*(jv+1);\r
170\r
171 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */\r
172 j = jv-jx; m = jx+jk;\r
173 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];\r
174\r
175 /* compute q[0],q[1],...q[jk] */\r
176 for (i=0;i<=jk;i++) {\r
177 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;\r
178 }\r
179\r
180 jz = jk;\r
181recompute:\r
182 /* distill q[] into iq[] reversingly */\r
183 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {\r
184 fw = (double)((int32_t)(twon24* z));\r
185 iq[i] = (int32_t)(z-two24*fw);\r
186 z = q[j-1]+fw;\r
187 }\r
188\r
189 /* compute n */\r
190 z = scalbn(z,q0); /* actual value of z */\r
191 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */\r
192 n = (int32_t) z;\r
193 z -= (double)n;\r
194 ih = 0;\r
195 if(q0>0) { /* need iq[jz-1] to determine n */\r
196 i = (iq[jz-1]>>(24-q0)); n += i;\r
197 iq[jz-1] -= i<<(24-q0);\r
198 ih = iq[jz-1]>>(23-q0);\r
199 }\r
200 else if(q0==0) ih = iq[jz-1]>>23;\r
201 else if(z>=0.5) ih=2;\r
202\r
203 if(ih>0) { /* q > 0.5 */\r
204 n += 1; carry = 0;\r
205 for(i=0;i<jz ;i++) { /* compute 1-q */\r
206 j = iq[i];\r
207 if(carry==0) {\r
208 if(j!=0) {\r
209 carry = 1; iq[i] = 0x1000000- j;\r
210 }\r
211 } else iq[i] = 0xffffff - j;\r
212 }\r
213 if(q0>0) { /* rare case: chance is 1 in 12 */\r
214 switch(q0) {\r
215 case 1:\r
216 iq[jz-1] &= 0x7fffff; break;\r
217 case 2:\r
218 iq[jz-1] &= 0x3fffff; break;\r
219 }\r
220 }\r
221 if(ih==2) {\r
222 z = one - z;\r
223 if(carry!=0) z -= scalbn(one,q0);\r
224 }\r
225 }\r
226\r
227 /* check if recomputation is needed */\r
228 if(z==zero) {\r
229 j = 0;\r
230 for (i=jz-1;i>=jk;i--) j |= iq[i];\r
231 if(j==0) { /* need recomputation */\r
232 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */\r
233\r
234 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */\r
235 f[jx+i] = (double) ipio2[jv+i];\r
236 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];\r
237 q[i] = fw;\r
238 }\r
239 jz += k;\r
240 goto recompute;\r
241 }\r
242 }\r
243\r
244 /* chop off zero terms */\r
245 if(z==0.0) {\r
246 jz -= 1; q0 -= 24;\r
247 while(iq[jz]==0) { jz--; q0-=24;}\r
248 } else { /* break z into 24-bit if necessary */\r
249 z = scalbn(z,-q0);\r
250 if(z>=two24) {\r
251 fw = (double)((int32_t)(twon24*z));\r
252 iq[jz] = (int32_t)(z-two24*fw);\r
253 jz += 1; q0 += 24;\r
254 iq[jz] = (int32_t) fw;\r
255 } else iq[jz] = (int32_t) z ;\r
256 }\r
257\r
258 /* convert integer "bit" chunk to floating-point value */\r
259 fw = scalbn(one,q0);\r
260 for(i=jz;i>=0;i--) {\r
261 q[i] = fw*(double)iq[i]; fw*=twon24;\r
262 }\r
263\r
264 /* compute PIo2[0,...,jp]*q[jz,...,0] */\r
265 for(i=jz;i>=0;i--) {\r
266 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];\r
267 fq[jz-i] = fw;\r
268 }\r
269\r
270 /* compress fq[] into y[] */\r
271 switch(prec) {\r
272 case 0:\r
273 fw = 0.0;\r
274 for (i=jz;i>=0;i--) fw += fq[i];\r
275 y[0] = (ih==0)? fw: -fw;\r
276 break;\r
277 case 1:\r
278 case 2:\r
279 fw = 0.0;\r
280 for (i=jz;i>=0;i--) fw += fq[i];\r
281 y[0] = (ih==0)? fw: -fw;\r
282 fw = fq[0]-fw;\r
283 for (i=1;i<=jz;i++) fw += fq[i];\r
284 y[1] = (ih==0)? fw: -fw;\r
285 break;\r
286 case 3: /* painful */\r
287 for (i=jz;i>0;i--) {\r
288 fw = fq[i-1]+fq[i];\r
289 fq[i] += fq[i-1]-fw;\r
290 fq[i-1] = fw;\r
291 }\r
292 for (i=jz;i>1;i--) {\r
293 fw = fq[i-1]+fq[i];\r
294 fq[i] += fq[i-1]-fw;\r
295 fq[i-1] = fw;\r
296 }\r
297 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];\r
298 if(ih==0) {\r
299 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;\r
300 } else {\r
301 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;\r
302 }\r
303 }\r
304 return n&7;\r
305}\r