]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * Routines to emulate some Altivec/VMX instructions, specifically | |
4 | * those that can trap when given denormalized operands in Java mode. | |
5 | */ | |
6 | #include <linux/kernel.h> | |
7 | #include <linux/errno.h> | |
8 | #include <linux/sched.h> | |
9 | #include <asm/ptrace.h> | |
10 | #include <asm/processor.h> | |
7c0f6ba6 | 11 | #include <linux/uaccess.h> |
1da177e4 LT |
12 | |
13 | /* Functions in vector.S */ | |
14 | extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); | |
15 | extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); | |
16 | extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | |
17 | extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | |
18 | extern void vrefp(vector128 *dst, vector128 *src); | |
19 | extern void vrsqrtefp(vector128 *dst, vector128 *src); | |
20 | extern void vexptep(vector128 *dst, vector128 *src); | |
21 | ||
22 | static unsigned int exp2s[8] = { | |
23 | 0x800000, | |
24 | 0x8b95c2, | |
25 | 0x9837f0, | |
26 | 0xa5fed7, | |
27 | 0xb504f3, | |
28 | 0xc5672a, | |
29 | 0xd744fd, | |
30 | 0xeac0c7 | |
31 | }; | |
32 | ||
33 | /* | |
34 | * Computes an estimate of 2^x. The `s' argument is the 32-bit | |
35 | * single-precision floating-point representation of x. | |
36 | */ | |
37 | static unsigned int eexp2(unsigned int s) | |
38 | { | |
39 | int exp, pwr; | |
40 | unsigned int mant, frac; | |
41 | ||
42 | /* extract exponent field from input */ | |
43 | exp = ((s >> 23) & 0xff) - 127; | |
44 | if (exp > 7) { | |
45 | /* check for NaN input */ | |
46 | if (exp == 128 && (s & 0x7fffff) != 0) | |
47 | return s | 0x400000; /* return QNaN */ | |
48 | /* 2^-big = 0, 2^+big = +Inf */ | |
49 | return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ | |
50 | } | |
51 | if (exp < -23) | |
52 | return 0x3f800000; /* 1.0 */ | |
53 | ||
54 | /* convert to fixed point integer in 9.23 representation */ | |
55 | pwr = (s & 0x7fffff) | 0x800000; | |
56 | if (exp > 0) | |
57 | pwr <<= exp; | |
58 | else | |
59 | pwr >>= -exp; | |
60 | if (s & 0x80000000) | |
61 | pwr = -pwr; | |
62 | ||
63 | /* extract integer part, which becomes exponent part of result */ | |
64 | exp = (pwr >> 23) + 126; | |
65 | if (exp >= 254) | |
66 | return 0x7f800000; | |
67 | if (exp < -23) | |
68 | return 0; | |
69 | ||
70 | /* table lookup on top 3 bits of fraction to get mantissa */ | |
71 | mant = exp2s[(pwr >> 20) & 7]; | |
72 | ||
73 | /* linear interpolation using remaining 20 bits of fraction */ | |
74 | asm("mulhwu %0,%1,%2" : "=r" (frac) | |
75 | : "r" (pwr << 12), "r" (0x172b83ff)); | |
76 | asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); | |
77 | mant += frac; | |
78 | ||
79 | if (exp >= 0) | |
80 | return mant + (exp << 23); | |
81 | ||
82 | /* denormalized result */ | |
83 | exp = -exp; | |
84 | mant += 1 << (exp - 1); | |
85 | return mant >> exp; | |
86 | } | |
87 | ||
88 | /* | |
89 | * Computes an estimate of log_2(x). The `s' argument is the 32-bit | |
90 | * single-precision floating-point representation of x. | |
91 | */ | |
92 | static unsigned int elog2(unsigned int s) | |
93 | { | |
94 | int exp, mant, lz, frac; | |
95 | ||
96 | exp = s & 0x7f800000; | |
97 | mant = s & 0x7fffff; | |
98 | if (exp == 0x7f800000) { /* Inf or NaN */ | |
99 | if (mant != 0) | |
100 | s |= 0x400000; /* turn NaN into QNaN */ | |
101 | return s; | |
102 | } | |
103 | if ((exp | mant) == 0) /* +0 or -0 */ | |
104 | return 0xff800000; /* return -Inf */ | |
105 | ||
106 | if (exp == 0) { | |
107 | /* denormalized */ | |
108 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); | |
109 | mant <<= lz - 8; | |
110 | exp = (-118 - lz) << 23; | |
111 | } else { | |
112 | mant |= 0x800000; | |
113 | exp -= 127 << 23; | |
114 | } | |
115 | ||
116 | if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ | |
117 | exp |= 0x400000; /* 0.5 * 2^23 */ | |
118 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
119 | : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ | |
120 | } | |
121 | if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ | |
122 | exp |= 0x200000; /* 0.25 * 2^23 */ | |
123 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
124 | : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ | |
125 | } | |
126 | if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ | |
127 | exp |= 0x100000; /* 0.125 * 2^23 */ | |
128 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
129 | : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ | |
130 | } | |
131 | if (mant > 0x800000) { /* 1.0 * 2^23 */ | |
132 | /* calculate (mant - 1) * 1.381097463 */ | |
133 | /* 1.381097463 == 0.125 / (2^0.125 - 1) */ | |
134 | asm("mulhwu %0,%1,%2" : "=r" (frac) | |
135 | : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); | |
136 | exp += frac; | |
137 | } | |
138 | s = exp & 0x80000000; | |
139 | if (exp != 0) { | |
140 | if (s) | |
141 | exp = -exp; | |
142 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); | |
143 | lz = 8 - lz; | |
144 | if (lz > 0) | |
145 | exp >>= lz; | |
146 | else if (lz < 0) | |
147 | exp <<= -lz; | |
148 | s += ((lz + 126) << 23) + exp; | |
149 | } | |
150 | return s; | |
151 | } | |
152 | ||
153 | #define VSCR_SAT 1 | |
154 | ||
155 | static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) | |
156 | { | |
157 | int exp, mant; | |
158 | ||
159 | exp = (x >> 23) & 0xff; | |
160 | mant = x & 0x7fffff; | |
161 | if (exp == 255 && mant != 0) | |
162 | return 0; /* NaN -> 0 */ | |
163 | exp = exp - 127 + scale; | |
164 | if (exp < 0) | |
165 | return 0; /* round towards zero */ | |
166 | if (exp >= 31) { | |
167 | /* saturate, unless the result would be -2^31 */ | |
168 | if (x + (scale << 23) != 0xcf000000) | |
169 | *vscrp |= VSCR_SAT; | |
170 | return (x & 0x80000000)? 0x80000000: 0x7fffffff; | |
171 | } | |
172 | mant |= 0x800000; | |
173 | mant = (mant << 7) >> (30 - exp); | |
174 | return (x & 0x80000000)? -mant: mant; | |
175 | } | |
176 | ||
177 | static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) | |
178 | { | |
179 | int exp; | |
180 | unsigned int mant; | |
181 | ||
182 | exp = (x >> 23) & 0xff; | |
183 | mant = x & 0x7fffff; | |
184 | if (exp == 255 && mant != 0) | |
185 | return 0; /* NaN -> 0 */ | |
186 | exp = exp - 127 + scale; | |
187 | if (exp < 0) | |
188 | return 0; /* round towards zero */ | |
189 | if (x & 0x80000000) { | |
190 | /* negative => saturate to 0 */ | |
191 | *vscrp |= VSCR_SAT; | |
192 | return 0; | |
193 | } | |
194 | if (exp >= 32) { | |
195 | /* saturate */ | |
196 | *vscrp |= VSCR_SAT; | |
197 | return 0xffffffff; | |
198 | } | |
199 | mant |= 0x800000; | |
200 | mant = (mant << 8) >> (31 - exp); | |
201 | return mant; | |
202 | } | |
203 | ||
204 | /* Round to floating integer, towards 0 */ | |
205 | static unsigned int rfiz(unsigned int x) | |
206 | { | |
207 | int exp; | |
208 | ||
209 | exp = ((x >> 23) & 0xff) - 127; | |
210 | if (exp == 128 && (x & 0x7fffff) != 0) | |
211 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
212 | if (exp >= 23) | |
213 | return x; /* it's an integer already (or Inf) */ | |
214 | if (exp < 0) | |
215 | return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ | |
216 | return x & ~(0x7fffff >> exp); | |
217 | } | |
218 | ||
219 | /* Round to floating integer, towards +/- Inf */ | |
220 | static unsigned int rfii(unsigned int x) | |
221 | { | |
222 | int exp, mask; | |
223 | ||
224 | exp = ((x >> 23) & 0xff) - 127; | |
225 | if (exp == 128 && (x & 0x7fffff) != 0) | |
226 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
227 | if (exp >= 23) | |
228 | return x; /* it's an integer already (or Inf) */ | |
229 | if ((x & 0x7fffffff) == 0) | |
230 | return x; /* +/-0 -> +/-0 */ | |
231 | if (exp < 0) | |
232 | /* 0 < |x| < 1.0 rounds to +/- 1.0 */ | |
233 | return (x & 0x80000000) | 0x3f800000; | |
234 | mask = 0x7fffff >> exp; | |
235 | /* mantissa overflows into exponent - that's OK, | |
236 | it can't overflow into the sign bit */ | |
237 | return (x + mask) & ~mask; | |
238 | } | |
239 | ||
240 | /* Round to floating integer, to nearest */ | |
241 | static unsigned int rfin(unsigned int x) | |
242 | { | |
243 | int exp, half; | |
244 | ||
245 | exp = ((x >> 23) & 0xff) - 127; | |
246 | if (exp == 128 && (x & 0x7fffff) != 0) | |
247 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
248 | if (exp >= 23) | |
249 | return x; /* it's an integer already (or Inf) */ | |
250 | if (exp < -1) | |
251 | return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ | |
252 | if (exp == -1) | |
253 | /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ | |
254 | return (x & 0x80000000) | 0x3f800000; | |
255 | half = 0x400000 >> exp; | |
256 | /* add 0.5 to the magnitude and chop off the fraction bits */ | |
257 | return (x + half) & ~(0x7fffff >> exp); | |
258 | } | |
259 | ||
260 | int emulate_altivec(struct pt_regs *regs) | |
261 | { | |
262 | unsigned int instr, i; | |
263 | unsigned int va, vb, vc, vd; | |
264 | vector128 *vrs; | |
265 | ||
266 | if (get_user(instr, (unsigned int __user *) regs->nip)) | |
267 | return -EFAULT; | |
268 | if ((instr >> 26) != 4) | |
269 | return -EINVAL; /* not an altivec instruction */ | |
270 | vd = (instr >> 21) & 0x1f; | |
271 | va = (instr >> 16) & 0x1f; | |
272 | vb = (instr >> 11) & 0x1f; | |
273 | vc = (instr >> 6) & 0x1f; | |
274 | ||
de79f7b9 | 275 | vrs = current->thread.vr_state.vr; |
1da177e4 LT |
276 | switch (instr & 0x3f) { |
277 | case 10: | |
278 | switch (vc) { | |
279 | case 0: /* vaddfp */ | |
280 | vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); | |
281 | break; | |
282 | case 1: /* vsubfp */ | |
283 | vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); | |
284 | break; | |
285 | case 4: /* vrefp */ | |
286 | vrefp(&vrs[vd], &vrs[vb]); | |
287 | break; | |
288 | case 5: /* vrsqrtefp */ | |
289 | vrsqrtefp(&vrs[vd], &vrs[vb]); | |
290 | break; | |
291 | case 6: /* vexptefp */ | |
292 | for (i = 0; i < 4; ++i) | |
293 | vrs[vd].u[i] = eexp2(vrs[vb].u[i]); | |
294 | break; | |
295 | case 7: /* vlogefp */ | |
296 | for (i = 0; i < 4; ++i) | |
297 | vrs[vd].u[i] = elog2(vrs[vb].u[i]); | |
298 | break; | |
299 | case 8: /* vrfin */ | |
300 | for (i = 0; i < 4; ++i) | |
301 | vrs[vd].u[i] = rfin(vrs[vb].u[i]); | |
302 | break; | |
303 | case 9: /* vrfiz */ | |
304 | for (i = 0; i < 4; ++i) | |
305 | vrs[vd].u[i] = rfiz(vrs[vb].u[i]); | |
306 | break; | |
307 | case 10: /* vrfip */ | |
308 | for (i = 0; i < 4; ++i) { | |
309 | u32 x = vrs[vb].u[i]; | |
310 | x = (x & 0x80000000)? rfiz(x): rfii(x); | |
311 | vrs[vd].u[i] = x; | |
312 | } | |
313 | break; | |
314 | case 11: /* vrfim */ | |
315 | for (i = 0; i < 4; ++i) { | |
316 | u32 x = vrs[vb].u[i]; | |
317 | x = (x & 0x80000000)? rfii(x): rfiz(x); | |
318 | vrs[vd].u[i] = x; | |
319 | } | |
320 | break; | |
321 | case 14: /* vctuxs */ | |
322 | for (i = 0; i < 4; ++i) | |
323 | vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, | |
de79f7b9 | 324 | ¤t->thread.vr_state.vscr.u[3]); |
1da177e4 LT |
325 | break; |
326 | case 15: /* vctsxs */ | |
327 | for (i = 0; i < 4; ++i) | |
328 | vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, | |
de79f7b9 | 329 | ¤t->thread.vr_state.vscr.u[3]); |
1da177e4 LT |
330 | break; |
331 | default: | |
332 | return -EINVAL; | |
333 | } | |
334 | break; | |
335 | case 46: /* vmaddfp */ | |
336 | vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | |
337 | break; | |
338 | case 47: /* vnmsubfp */ | |
339 | vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | |
340 | break; | |
341 | default: | |
342 | return -EINVAL; | |
343 | } | |
344 | ||
345 | return 0; | |
346 | } |