]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * Routines to emulate some Altivec/VMX instructions, specifically | |
4 | * those that can trap when given denormalized operands in Java mode. | |
5 | */ | |
6 | #include <linux/kernel.h> | |
7 | #include <linux/errno.h> | |
8 | #include <linux/sched.h> | |
9 | #include <asm/ptrace.h> | |
10 | #include <asm/processor.h> | |
d647b210 | 11 | #include <asm/switch_to.h> |
7c0f6ba6 | 12 | #include <linux/uaccess.h> |
1da177e4 LT |
13 | |
14 | /* Functions in vector.S */ | |
15 | extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); | |
16 | extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); | |
17 | extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | |
18 | extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | |
19 | extern void vrefp(vector128 *dst, vector128 *src); | |
20 | extern void vrsqrtefp(vector128 *dst, vector128 *src); | |
21 | extern void vexptep(vector128 *dst, vector128 *src); | |
22 | ||
23 | static unsigned int exp2s[8] = { | |
24 | 0x800000, | |
25 | 0x8b95c2, | |
26 | 0x9837f0, | |
27 | 0xa5fed7, | |
28 | 0xb504f3, | |
29 | 0xc5672a, | |
30 | 0xd744fd, | |
31 | 0xeac0c7 | |
32 | }; | |
33 | ||
34 | /* | |
35 | * Computes an estimate of 2^x. The `s' argument is the 32-bit | |
36 | * single-precision floating-point representation of x. | |
37 | */ | |
38 | static unsigned int eexp2(unsigned int s) | |
39 | { | |
40 | int exp, pwr; | |
41 | unsigned int mant, frac; | |
42 | ||
43 | /* extract exponent field from input */ | |
44 | exp = ((s >> 23) & 0xff) - 127; | |
45 | if (exp > 7) { | |
46 | /* check for NaN input */ | |
47 | if (exp == 128 && (s & 0x7fffff) != 0) | |
48 | return s | 0x400000; /* return QNaN */ | |
49 | /* 2^-big = 0, 2^+big = +Inf */ | |
50 | return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ | |
51 | } | |
52 | if (exp < -23) | |
53 | return 0x3f800000; /* 1.0 */ | |
54 | ||
55 | /* convert to fixed point integer in 9.23 representation */ | |
56 | pwr = (s & 0x7fffff) | 0x800000; | |
57 | if (exp > 0) | |
58 | pwr <<= exp; | |
59 | else | |
60 | pwr >>= -exp; | |
61 | if (s & 0x80000000) | |
62 | pwr = -pwr; | |
63 | ||
64 | /* extract integer part, which becomes exponent part of result */ | |
65 | exp = (pwr >> 23) + 126; | |
66 | if (exp >= 254) | |
67 | return 0x7f800000; | |
68 | if (exp < -23) | |
69 | return 0; | |
70 | ||
71 | /* table lookup on top 3 bits of fraction to get mantissa */ | |
72 | mant = exp2s[(pwr >> 20) & 7]; | |
73 | ||
74 | /* linear interpolation using remaining 20 bits of fraction */ | |
75 | asm("mulhwu %0,%1,%2" : "=r" (frac) | |
76 | : "r" (pwr << 12), "r" (0x172b83ff)); | |
77 | asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); | |
78 | mant += frac; | |
79 | ||
80 | if (exp >= 0) | |
81 | return mant + (exp << 23); | |
82 | ||
83 | /* denormalized result */ | |
84 | exp = -exp; | |
85 | mant += 1 << (exp - 1); | |
86 | return mant >> exp; | |
87 | } | |
88 | ||
89 | /* | |
90 | * Computes an estimate of log_2(x). The `s' argument is the 32-bit | |
91 | * single-precision floating-point representation of x. | |
92 | */ | |
93 | static unsigned int elog2(unsigned int s) | |
94 | { | |
95 | int exp, mant, lz, frac; | |
96 | ||
97 | exp = s & 0x7f800000; | |
98 | mant = s & 0x7fffff; | |
99 | if (exp == 0x7f800000) { /* Inf or NaN */ | |
100 | if (mant != 0) | |
101 | s |= 0x400000; /* turn NaN into QNaN */ | |
102 | return s; | |
103 | } | |
104 | if ((exp | mant) == 0) /* +0 or -0 */ | |
105 | return 0xff800000; /* return -Inf */ | |
106 | ||
107 | if (exp == 0) { | |
108 | /* denormalized */ | |
109 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); | |
110 | mant <<= lz - 8; | |
111 | exp = (-118 - lz) << 23; | |
112 | } else { | |
113 | mant |= 0x800000; | |
114 | exp -= 127 << 23; | |
115 | } | |
116 | ||
117 | if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ | |
118 | exp |= 0x400000; /* 0.5 * 2^23 */ | |
119 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
120 | : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ | |
121 | } | |
122 | if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ | |
123 | exp |= 0x200000; /* 0.25 * 2^23 */ | |
124 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
125 | : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ | |
126 | } | |
127 | if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ | |
128 | exp |= 0x100000; /* 0.125 * 2^23 */ | |
129 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
130 | : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ | |
131 | } | |
132 | if (mant > 0x800000) { /* 1.0 * 2^23 */ | |
133 | /* calculate (mant - 1) * 1.381097463 */ | |
134 | /* 1.381097463 == 0.125 / (2^0.125 - 1) */ | |
135 | asm("mulhwu %0,%1,%2" : "=r" (frac) | |
136 | : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); | |
137 | exp += frac; | |
138 | } | |
139 | s = exp & 0x80000000; | |
140 | if (exp != 0) { | |
141 | if (s) | |
142 | exp = -exp; | |
143 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); | |
144 | lz = 8 - lz; | |
145 | if (lz > 0) | |
146 | exp >>= lz; | |
147 | else if (lz < 0) | |
148 | exp <<= -lz; | |
149 | s += ((lz + 126) << 23) + exp; | |
150 | } | |
151 | return s; | |
152 | } | |
153 | ||
154 | #define VSCR_SAT 1 | |
155 | ||
156 | static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) | |
157 | { | |
158 | int exp, mant; | |
159 | ||
160 | exp = (x >> 23) & 0xff; | |
161 | mant = x & 0x7fffff; | |
162 | if (exp == 255 && mant != 0) | |
163 | return 0; /* NaN -> 0 */ | |
164 | exp = exp - 127 + scale; | |
165 | if (exp < 0) | |
166 | return 0; /* round towards zero */ | |
167 | if (exp >= 31) { | |
168 | /* saturate, unless the result would be -2^31 */ | |
169 | if (x + (scale << 23) != 0xcf000000) | |
170 | *vscrp |= VSCR_SAT; | |
171 | return (x & 0x80000000)? 0x80000000: 0x7fffffff; | |
172 | } | |
173 | mant |= 0x800000; | |
174 | mant = (mant << 7) >> (30 - exp); | |
175 | return (x & 0x80000000)? -mant: mant; | |
176 | } | |
177 | ||
178 | static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) | |
179 | { | |
180 | int exp; | |
181 | unsigned int mant; | |
182 | ||
183 | exp = (x >> 23) & 0xff; | |
184 | mant = x & 0x7fffff; | |
185 | if (exp == 255 && mant != 0) | |
186 | return 0; /* NaN -> 0 */ | |
187 | exp = exp - 127 + scale; | |
188 | if (exp < 0) | |
189 | return 0; /* round towards zero */ | |
190 | if (x & 0x80000000) { | |
191 | /* negative => saturate to 0 */ | |
192 | *vscrp |= VSCR_SAT; | |
193 | return 0; | |
194 | } | |
195 | if (exp >= 32) { | |
196 | /* saturate */ | |
197 | *vscrp |= VSCR_SAT; | |
198 | return 0xffffffff; | |
199 | } | |
200 | mant |= 0x800000; | |
201 | mant = (mant << 8) >> (31 - exp); | |
202 | return mant; | |
203 | } | |
204 | ||
205 | /* Round to floating integer, towards 0 */ | |
206 | static unsigned int rfiz(unsigned int x) | |
207 | { | |
208 | int exp; | |
209 | ||
210 | exp = ((x >> 23) & 0xff) - 127; | |
211 | if (exp == 128 && (x & 0x7fffff) != 0) | |
212 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
213 | if (exp >= 23) | |
214 | return x; /* it's an integer already (or Inf) */ | |
215 | if (exp < 0) | |
216 | return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ | |
217 | return x & ~(0x7fffff >> exp); | |
218 | } | |
219 | ||
220 | /* Round to floating integer, towards +/- Inf */ | |
221 | static unsigned int rfii(unsigned int x) | |
222 | { | |
223 | int exp, mask; | |
224 | ||
225 | exp = ((x >> 23) & 0xff) - 127; | |
226 | if (exp == 128 && (x & 0x7fffff) != 0) | |
227 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
228 | if (exp >= 23) | |
229 | return x; /* it's an integer already (or Inf) */ | |
230 | if ((x & 0x7fffffff) == 0) | |
231 | return x; /* +/-0 -> +/-0 */ | |
232 | if (exp < 0) | |
233 | /* 0 < |x| < 1.0 rounds to +/- 1.0 */ | |
234 | return (x & 0x80000000) | 0x3f800000; | |
235 | mask = 0x7fffff >> exp; | |
236 | /* mantissa overflows into exponent - that's OK, | |
237 | it can't overflow into the sign bit */ | |
238 | return (x + mask) & ~mask; | |
239 | } | |
240 | ||
241 | /* Round to floating integer, to nearest */ | |
242 | static unsigned int rfin(unsigned int x) | |
243 | { | |
244 | int exp, half; | |
245 | ||
246 | exp = ((x >> 23) & 0xff) - 127; | |
247 | if (exp == 128 && (x & 0x7fffff) != 0) | |
248 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
249 | if (exp >= 23) | |
250 | return x; /* it's an integer already (or Inf) */ | |
251 | if (exp < -1) | |
252 | return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ | |
253 | if (exp == -1) | |
254 | /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ | |
255 | return (x & 0x80000000) | 0x3f800000; | |
256 | half = 0x400000 >> exp; | |
257 | /* add 0.5 to the magnitude and chop off the fraction bits */ | |
258 | return (x + half) & ~(0x7fffff >> exp); | |
259 | } | |
260 | ||
261 | int emulate_altivec(struct pt_regs *regs) | |
262 | { | |
263 | unsigned int instr, i; | |
264 | unsigned int va, vb, vc, vd; | |
265 | vector128 *vrs; | |
266 | ||
267 | if (get_user(instr, (unsigned int __user *) regs->nip)) | |
268 | return -EFAULT; | |
269 | if ((instr >> 26) != 4) | |
270 | return -EINVAL; /* not an altivec instruction */ | |
271 | vd = (instr >> 21) & 0x1f; | |
272 | va = (instr >> 16) & 0x1f; | |
273 | vb = (instr >> 11) & 0x1f; | |
274 | vc = (instr >> 6) & 0x1f; | |
275 | ||
de79f7b9 | 276 | vrs = current->thread.vr_state.vr; |
1da177e4 LT |
277 | switch (instr & 0x3f) { |
278 | case 10: | |
279 | switch (vc) { | |
280 | case 0: /* vaddfp */ | |
281 | vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); | |
282 | break; | |
283 | case 1: /* vsubfp */ | |
284 | vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); | |
285 | break; | |
286 | case 4: /* vrefp */ | |
287 | vrefp(&vrs[vd], &vrs[vb]); | |
288 | break; | |
289 | case 5: /* vrsqrtefp */ | |
290 | vrsqrtefp(&vrs[vd], &vrs[vb]); | |
291 | break; | |
292 | case 6: /* vexptefp */ | |
293 | for (i = 0; i < 4; ++i) | |
294 | vrs[vd].u[i] = eexp2(vrs[vb].u[i]); | |
295 | break; | |
296 | case 7: /* vlogefp */ | |
297 | for (i = 0; i < 4; ++i) | |
298 | vrs[vd].u[i] = elog2(vrs[vb].u[i]); | |
299 | break; | |
300 | case 8: /* vrfin */ | |
301 | for (i = 0; i < 4; ++i) | |
302 | vrs[vd].u[i] = rfin(vrs[vb].u[i]); | |
303 | break; | |
304 | case 9: /* vrfiz */ | |
305 | for (i = 0; i < 4; ++i) | |
306 | vrs[vd].u[i] = rfiz(vrs[vb].u[i]); | |
307 | break; | |
308 | case 10: /* vrfip */ | |
309 | for (i = 0; i < 4; ++i) { | |
310 | u32 x = vrs[vb].u[i]; | |
311 | x = (x & 0x80000000)? rfiz(x): rfii(x); | |
312 | vrs[vd].u[i] = x; | |
313 | } | |
314 | break; | |
315 | case 11: /* vrfim */ | |
316 | for (i = 0; i < 4; ++i) { | |
317 | u32 x = vrs[vb].u[i]; | |
318 | x = (x & 0x80000000)? rfii(x): rfiz(x); | |
319 | vrs[vd].u[i] = x; | |
320 | } | |
321 | break; | |
322 | case 14: /* vctuxs */ | |
323 | for (i = 0; i < 4; ++i) | |
324 | vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, | |
de79f7b9 | 325 | ¤t->thread.vr_state.vscr.u[3]); |
1da177e4 LT |
326 | break; |
327 | case 15: /* vctsxs */ | |
328 | for (i = 0; i < 4; ++i) | |
329 | vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, | |
de79f7b9 | 330 | ¤t->thread.vr_state.vscr.u[3]); |
1da177e4 LT |
331 | break; |
332 | default: | |
333 | return -EINVAL; | |
334 | } | |
335 | break; | |
336 | case 46: /* vmaddfp */ | |
337 | vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | |
338 | break; | |
339 | case 47: /* vnmsubfp */ | |
340 | vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | |
341 | break; | |
342 | default: | |
343 | return -EINVAL; | |
344 | } | |
345 | ||
346 | return 0; | |
347 | } |