]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
0c51ed93 DM |
2 | /* visemul.c: Emulation of VIS instructions. |
3 | * | |
4 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | |
5 | */ | |
6 | #include <linux/kernel.h> | |
7 | #include <linux/errno.h> | |
8 | #include <linux/thread_info.h> | |
121dd5f2 | 9 | #include <linux/perf_event.h> |
0c51ed93 DM |
10 | |
11 | #include <asm/ptrace.h> | |
12 | #include <asm/pstate.h> | |
0c51ed93 | 13 | #include <asm/fpumacro.h> |
7c0f6ba6 | 14 | #include <linux/uaccess.h> |
d550bbd4 | 15 | #include <asm/cacheflush.h> |
0c51ed93 DM |
16 | |
17 | /* OPF field of various VIS instructions. */ | |
18 | ||
19 | /* 000111011 - four 16-bit packs */ | |
20 | #define FPACK16_OPF 0x03b | |
21 | ||
22 | /* 000111010 - two 32-bit packs */ | |
23 | #define FPACK32_OPF 0x03a | |
24 | ||
25 | /* 000111101 - four 16-bit packs */ | |
26 | #define FPACKFIX_OPF 0x03d | |
27 | ||
28 | /* 001001101 - four 16-bit expands */ | |
29 | #define FEXPAND_OPF 0x04d | |
30 | ||
31 | /* 001001011 - two 32-bit merges */ | |
32 | #define FPMERGE_OPF 0x04b | |
33 | ||
8ab102d6 | 34 | /* 000110001 - 8-by-16-bit partitioned product */ |
0c51ed93 DM |
35 | #define FMUL8x16_OPF 0x031 |
36 | ||
37 | /* 000110011 - 8-by-16-bit upper alpha partitioned product */ | |
38 | #define FMUL8x16AU_OPF 0x033 | |
39 | ||
40 | /* 000110101 - 8-by-16-bit lower alpha partitioned product */ | |
41 | #define FMUL8x16AL_OPF 0x035 | |
42 | ||
43 | /* 000110110 - upper 8-by-16-bit partitioned product */ | |
44 | #define FMUL8SUx16_OPF 0x036 | |
45 | ||
46 | /* 000110111 - lower 8-by-16-bit partitioned product */ | |
47 | #define FMUL8ULx16_OPF 0x037 | |
48 | ||
49 | /* 000111000 - upper 8-by-16-bit partitioned product */ | |
50 | #define FMULD8SUx16_OPF 0x038 | |
51 | ||
52 | /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ | |
53 | #define FMULD8ULx16_OPF 0x039 | |
54 | ||
55 | /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ | |
56 | #define FCMPGT16_OPF 0x028 | |
57 | ||
58 | /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ | |
59 | #define FCMPGT32_OPF 0x02c | |
60 | ||
61 | /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ | |
62 | #define FCMPLE16_OPF 0x020 | |
63 | ||
64 | /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ | |
65 | #define FCMPLE32_OPF 0x024 | |
66 | ||
67 | /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ | |
68 | #define FCMPNE16_OPF 0x022 | |
69 | ||
70 | /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ | |
71 | #define FCMPNE32_OPF 0x026 | |
72 | ||
73 | /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ | |
74 | #define FCMPEQ16_OPF 0x02a | |
75 | ||
76 | /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ | |
77 | #define FCMPEQ32_OPF 0x02e | |
78 | ||
79 | /* 000000000 - Eight 8-bit edge boundary processing */ | |
80 | #define EDGE8_OPF 0x000 | |
81 | ||
82 | /* 000000001 - Eight 8-bit edge boundary processing, no CC */ | |
83 | #define EDGE8N_OPF 0x001 | |
84 | ||
85 | /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ | |
86 | #define EDGE8L_OPF 0x002 | |
87 | ||
88 | /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ | |
89 | #define EDGE8LN_OPF 0x003 | |
90 | ||
91 | /* 000000100 - Four 16-bit edge boundary processing */ | |
92 | #define EDGE16_OPF 0x004 | |
93 | ||
94 | /* 000000101 - Four 16-bit edge boundary processing, no CC */ | |
95 | #define EDGE16N_OPF 0x005 | |
96 | ||
97 | /* 000000110 - Four 16-bit edge boundary processing, little-endian */ | |
98 | #define EDGE16L_OPF 0x006 | |
99 | ||
100 | /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ | |
101 | #define EDGE16LN_OPF 0x007 | |
102 | ||
103 | /* 000001000 - Two 32-bit edge boundary processing */ | |
104 | #define EDGE32_OPF 0x008 | |
105 | ||
106 | /* 000001001 - Two 32-bit edge boundary processing, no CC */ | |
107 | #define EDGE32N_OPF 0x009 | |
108 | ||
109 | /* 000001010 - Two 32-bit edge boundary processing, little-endian */ | |
110 | #define EDGE32L_OPF 0x00a | |
111 | ||
112 | /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ | |
113 | #define EDGE32LN_OPF 0x00b | |
114 | ||
115 | /* 000111110 - distance between 8 8-bit components */ | |
116 | #define PDIST_OPF 0x03e | |
117 | ||
118 | /* 000010000 - convert 8-bit 3-D address to blocked byte address */ | |
119 | #define ARRAY8_OPF 0x010 | |
120 | ||
121 | /* 000010010 - convert 16-bit 3-D address to blocked byte address */ | |
122 | #define ARRAY16_OPF 0x012 | |
123 | ||
124 | /* 000010100 - convert 32-bit 3-D address to blocked byte address */ | |
125 | #define ARRAY32_OPF 0x014 | |
126 | ||
127 | /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ | |
128 | #define BMASK_OPF 0x019 | |
129 | ||
130 | /* 001001100 - Permute bytes as specified by GSR.MASK */ | |
131 | #define BSHUFFLE_OPF 0x04c | |
132 | ||
0c51ed93 DM |
133 | #define VIS_OPF_SHIFT 5 |
134 | #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) | |
135 | ||
726c12f5 | 136 | #define RS1(INSN) (((INSN) >> 14) & 0x1f) |
0c51ed93 DM |
137 | #define RS2(INSN) (((INSN) >> 0) & 0x1f) |
138 | #define RD(INSN) (((INSN) >> 25) & 0x1f) | |
139 | ||
140 | static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, | |
141 | unsigned int rd, int from_kernel) | |
142 | { | |
143 | if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { | |
144 | if (from_kernel != 0) | |
145 | __asm__ __volatile__("flushw"); | |
146 | else | |
147 | flushw_user(); | |
148 | } | |
149 | } | |
150 | ||
151 | static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) | |
152 | { | |
517ffce4 | 153 | unsigned long value, fp; |
0c51ed93 DM |
154 | |
155 | if (reg < 16) | |
156 | return (!reg ? 0 : regs->u_regs[reg]); | |
517ffce4 DM |
157 | |
158 | fp = regs->u_regs[UREG_FP]; | |
159 | ||
0c51ed93 DM |
160 | if (regs->tstate & TSTATE_PRIV) { |
161 | struct reg_window *win; | |
517ffce4 | 162 | win = (struct reg_window *)(fp + STACK_BIAS); |
0c51ed93 | 163 | value = win->locals[reg - 16]; |
517ffce4 | 164 | } else if (!test_thread_64bit_stack(fp)) { |
0c51ed93 | 165 | struct reg_window32 __user *win32; |
517ffce4 | 166 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); |
0c51ed93 DM |
167 | get_user(value, &win32->locals[reg - 16]); |
168 | } else { | |
169 | struct reg_window __user *win; | |
517ffce4 | 170 | win = (struct reg_window __user *)(fp + STACK_BIAS); |
0c51ed93 DM |
171 | get_user(value, &win->locals[reg - 16]); |
172 | } | |
173 | return value; | |
174 | } | |
175 | ||
176 | static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, | |
177 | struct pt_regs *regs) | |
178 | { | |
517ffce4 DM |
179 | unsigned long fp = regs->u_regs[UREG_FP]; |
180 | ||
0c51ed93 DM |
181 | BUG_ON(reg < 16); |
182 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
183 | ||
517ffce4 | 184 | if (!test_thread_64bit_stack(fp)) { |
0c51ed93 | 185 | struct reg_window32 __user *win32; |
517ffce4 | 186 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); |
0c51ed93 DM |
187 | return (unsigned long __user *)&win32->locals[reg - 16]; |
188 | } else { | |
189 | struct reg_window __user *win; | |
517ffce4 | 190 | win = (struct reg_window __user *)(fp + STACK_BIAS); |
0c51ed93 DM |
191 | return &win->locals[reg - 16]; |
192 | } | |
193 | } | |
194 | ||
195 | static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, | |
196 | struct pt_regs *regs) | |
197 | { | |
198 | BUG_ON(reg >= 16); | |
199 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
200 | ||
201 | return ®s->u_regs[reg]; | |
202 | } | |
203 | ||
204 | static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) | |
205 | { | |
206 | if (rd < 16) { | |
207 | unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); | |
208 | ||
209 | *rd_kern = val; | |
210 | } else { | |
211 | unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); | |
212 | ||
517ffce4 | 213 | if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) |
0c51ed93 DM |
214 | __put_user((u32)val, (u32 __user *)rd_user); |
215 | else | |
216 | __put_user(val, rd_user); | |
217 | } | |
218 | } | |
219 | ||
220 | static inline unsigned long fpd_regval(struct fpustate *f, | |
221 | unsigned int insn_regnum) | |
222 | { | |
223 | insn_regnum = (((insn_regnum & 1) << 5) | | |
224 | (insn_regnum & 0x1e)); | |
225 | ||
226 | return *(unsigned long *) &f->regs[insn_regnum]; | |
227 | } | |
228 | ||
229 | static inline unsigned long *fpd_regaddr(struct fpustate *f, | |
230 | unsigned int insn_regnum) | |
231 | { | |
232 | insn_regnum = (((insn_regnum & 1) << 5) | | |
233 | (insn_regnum & 0x1e)); | |
234 | ||
235 | return (unsigned long *) &f->regs[insn_regnum]; | |
236 | } | |
237 | ||
238 | static inline unsigned int fps_regval(struct fpustate *f, | |
239 | unsigned int insn_regnum) | |
240 | { | |
241 | return f->regs[insn_regnum]; | |
242 | } | |
243 | ||
244 | static inline unsigned int *fps_regaddr(struct fpustate *f, | |
245 | unsigned int insn_regnum) | |
246 | { | |
247 | return &f->regs[insn_regnum]; | |
248 | } | |
249 | ||
250 | struct edge_tab { | |
251 | u16 left, right; | |
252 | }; | |
7e0b1e61 | 253 | static struct edge_tab edge8_tab[8] = { |
0c51ed93 DM |
254 | { 0xff, 0x80 }, |
255 | { 0x7f, 0xc0 }, | |
256 | { 0x3f, 0xe0 }, | |
257 | { 0x1f, 0xf0 }, | |
258 | { 0x0f, 0xf8 }, | |
259 | { 0x07, 0xfc }, | |
260 | { 0x03, 0xfe }, | |
261 | { 0x01, 0xff }, | |
262 | }; | |
7e0b1e61 | 263 | static struct edge_tab edge8_tab_l[8] = { |
0c51ed93 DM |
264 | { 0xff, 0x01 }, |
265 | { 0xfe, 0x03 }, | |
266 | { 0xfc, 0x07 }, | |
267 | { 0xf8, 0x0f }, | |
268 | { 0xf0, 0x1f }, | |
269 | { 0xe0, 0x3f }, | |
270 | { 0xc0, 0x7f }, | |
271 | { 0x80, 0xff }, | |
272 | }; | |
7e0b1e61 | 273 | static struct edge_tab edge16_tab[4] = { |
0c51ed93 DM |
274 | { 0xf, 0x8 }, |
275 | { 0x7, 0xc }, | |
276 | { 0x3, 0xe }, | |
277 | { 0x1, 0xf }, | |
278 | }; | |
7e0b1e61 | 279 | static struct edge_tab edge16_tab_l[4] = { |
0c51ed93 DM |
280 | { 0xf, 0x1 }, |
281 | { 0xe, 0x3 }, | |
282 | { 0xc, 0x7 }, | |
283 | { 0x8, 0xf }, | |
284 | }; | |
7e0b1e61 | 285 | static struct edge_tab edge32_tab[2] = { |
0c51ed93 DM |
286 | { 0x3, 0x2 }, |
287 | { 0x1, 0x3 }, | |
288 | }; | |
7e0b1e61 | 289 | static struct edge_tab edge32_tab_l[2] = { |
0c51ed93 DM |
290 | { 0x3, 0x1 }, |
291 | { 0x2, 0x3 }, | |
292 | }; | |
293 | ||
294 | static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
295 | { | |
296 | unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; | |
297 | u16 left, right; | |
298 | ||
299 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
300 | orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); | |
301 | orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); | |
302 | ||
303 | if (test_thread_flag(TIF_32BIT)) { | |
304 | rs1 = rs1 & 0xffffffff; | |
305 | rs2 = rs2 & 0xffffffff; | |
306 | } | |
307 | switch (opf) { | |
308 | default: | |
309 | case EDGE8_OPF: | |
310 | case EDGE8N_OPF: | |
311 | left = edge8_tab[rs1 & 0x7].left; | |
312 | right = edge8_tab[rs2 & 0x7].right; | |
313 | break; | |
314 | case EDGE8L_OPF: | |
315 | case EDGE8LN_OPF: | |
316 | left = edge8_tab_l[rs1 & 0x7].left; | |
317 | right = edge8_tab_l[rs2 & 0x7].right; | |
318 | break; | |
319 | ||
320 | case EDGE16_OPF: | |
321 | case EDGE16N_OPF: | |
322 | left = edge16_tab[(rs1 >> 1) & 0x3].left; | |
323 | right = edge16_tab[(rs2 >> 1) & 0x3].right; | |
324 | break; | |
325 | ||
326 | case EDGE16L_OPF: | |
327 | case EDGE16LN_OPF: | |
328 | left = edge16_tab_l[(rs1 >> 1) & 0x3].left; | |
329 | right = edge16_tab_l[(rs2 >> 1) & 0x3].right; | |
330 | break; | |
331 | ||
332 | case EDGE32_OPF: | |
333 | case EDGE32N_OPF: | |
334 | left = edge32_tab[(rs1 >> 2) & 0x1].left; | |
335 | right = edge32_tab[(rs2 >> 2) & 0x1].right; | |
336 | break; | |
337 | ||
338 | case EDGE32L_OPF: | |
339 | case EDGE32LN_OPF: | |
340 | left = edge32_tab_l[(rs1 >> 2) & 0x1].left; | |
341 | right = edge32_tab_l[(rs2 >> 2) & 0x1].right; | |
342 | break; | |
6cb79b3f | 343 | } |
0c51ed93 DM |
344 | |
345 | if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) | |
346 | rd_val = right & left; | |
347 | else | |
348 | rd_val = left; | |
349 | ||
350 | store_reg(regs, rd_val, RD(insn)); | |
351 | ||
352 | switch (opf) { | |
353 | case EDGE8_OPF: | |
354 | case EDGE8L_OPF: | |
355 | case EDGE16_OPF: | |
356 | case EDGE16L_OPF: | |
357 | case EDGE32_OPF: | |
358 | case EDGE32L_OPF: { | |
359 | unsigned long ccr, tstate; | |
360 | ||
361 | __asm__ __volatile__("subcc %1, %2, %%g0\n\t" | |
362 | "rd %%ccr, %0" | |
363 | : "=r" (ccr) | |
364 | : "r" (orig_rs1), "r" (orig_rs2) | |
365 | : "cc"); | |
366 | tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); | |
367 | regs->tstate = tstate | (ccr << 32UL); | |
368 | } | |
6cb79b3f | 369 | } |
0c51ed93 DM |
370 | } |
371 | ||
372 | static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
373 | { | |
374 | unsigned long rs1, rs2, rd_val; | |
375 | unsigned int bits, bits_mask; | |
376 | ||
377 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
378 | rs1 = fetch_reg(RS1(insn), regs); | |
379 | rs2 = fetch_reg(RS2(insn), regs); | |
380 | ||
381 | bits = (rs2 > 5 ? 5 : rs2); | |
382 | bits_mask = (1UL << bits) - 1UL; | |
383 | ||
384 | rd_val = ((((rs1 >> 11) & 0x3) << 0) | | |
385 | (((rs1 >> 33) & 0x3) << 2) | | |
386 | (((rs1 >> 55) & 0x1) << 4) | | |
387 | (((rs1 >> 13) & 0xf) << 5) | | |
388 | (((rs1 >> 35) & 0xf) << 9) | | |
389 | (((rs1 >> 56) & 0xf) << 13) | | |
390 | (((rs1 >> 17) & bits_mask) << 17) | | |
391 | (((rs1 >> 39) & bits_mask) << (17 + bits)) | | |
392 | (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); | |
393 | ||
394 | switch (opf) { | |
395 | case ARRAY16_OPF: | |
396 | rd_val <<= 1; | |
397 | break; | |
398 | ||
399 | case ARRAY32_OPF: | |
400 | rd_val <<= 2; | |
6cb79b3f | 401 | } |
0c51ed93 DM |
402 | |
403 | store_reg(regs, rd_val, RD(insn)); | |
404 | } | |
405 | ||
406 | static void bmask(struct pt_regs *regs, unsigned int insn) | |
407 | { | |
408 | unsigned long rs1, rs2, rd_val, gsr; | |
409 | ||
410 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
411 | rs1 = fetch_reg(RS1(insn), regs); | |
412 | rs2 = fetch_reg(RS2(insn), regs); | |
413 | rd_val = rs1 + rs2; | |
414 | ||
415 | store_reg(regs, rd_val, RD(insn)); | |
416 | ||
417 | gsr = current_thread_info()->gsr[0] & 0xffffffff; | |
418 | gsr |= rd_val << 32UL; | |
419 | current_thread_info()->gsr[0] = gsr; | |
420 | } | |
421 | ||
422 | static void bshuffle(struct pt_regs *regs, unsigned int insn) | |
423 | { | |
424 | struct fpustate *f = FPUSTATE; | |
425 | unsigned long rs1, rs2, rd_val; | |
426 | unsigned long bmask, i; | |
427 | ||
428 | bmask = current_thread_info()->gsr[0] >> 32UL; | |
429 | ||
430 | rs1 = fpd_regval(f, RS1(insn)); | |
431 | rs2 = fpd_regval(f, RS2(insn)); | |
432 | ||
433 | rd_val = 0UL; | |
434 | for (i = 0; i < 8; i++) { | |
435 | unsigned long which = (bmask >> (i * 4)) & 0xf; | |
436 | unsigned long byte; | |
437 | ||
438 | if (which < 8) | |
439 | byte = (rs1 >> (which * 8)) & 0xff; | |
440 | else | |
441 | byte = (rs2 >> ((which-8)*8)) & 0xff; | |
442 | rd_val |= (byte << (i * 8)); | |
443 | } | |
444 | ||
445 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
446 | } | |
447 | ||
448 | static void pdist(struct pt_regs *regs, unsigned int insn) | |
449 | { | |
450 | struct fpustate *f = FPUSTATE; | |
451 | unsigned long rs1, rs2, *rd, rd_val; | |
452 | unsigned long i; | |
453 | ||
454 | rs1 = fpd_regval(f, RS1(insn)); | |
726c12f5 | 455 | rs2 = fpd_regval(f, RS2(insn)); |
0c51ed93 DM |
456 | rd = fpd_regaddr(f, RD(insn)); |
457 | ||
458 | rd_val = *rd; | |
459 | ||
460 | for (i = 0; i < 8; i++) { | |
461 | s16 s1, s2; | |
462 | ||
463 | s1 = (rs1 >> (56 - (i * 8))) & 0xff; | |
464 | s2 = (rs2 >> (56 - (i * 8))) & 0xff; | |
465 | ||
466 | /* Absolute value of difference. */ | |
467 | s1 -= s2; | |
468 | if (s1 < 0) | |
469 | s1 = ~s1 + 1; | |
470 | ||
471 | rd_val += s1; | |
472 | } | |
473 | ||
474 | *rd = rd_val; | |
475 | } | |
476 | ||
477 | static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
478 | { | |
479 | struct fpustate *f = FPUSTATE; | |
480 | unsigned long rs1, rs2, gsr, scale, rd_val; | |
481 | ||
482 | gsr = current_thread_info()->gsr[0]; | |
483 | scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); | |
484 | switch (opf) { | |
485 | case FPACK16_OPF: { | |
486 | unsigned long byte; | |
487 | ||
488 | rs2 = fpd_regval(f, RS2(insn)); | |
489 | rd_val = 0; | |
490 | for (byte = 0; byte < 4; byte++) { | |
491 | unsigned int val; | |
492 | s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; | |
493 | int scaled = src << scale; | |
494 | int from_fixed = scaled >> 7; | |
495 | ||
496 | val = ((from_fixed < 0) ? | |
497 | 0 : | |
498 | (from_fixed > 255) ? | |
499 | 255 : from_fixed); | |
500 | ||
501 | rd_val |= (val << (8 * byte)); | |
502 | } | |
503 | *fps_regaddr(f, RD(insn)) = rd_val; | |
504 | break; | |
505 | } | |
506 | ||
507 | case FPACK32_OPF: { | |
508 | unsigned long word; | |
509 | ||
510 | rs1 = fpd_regval(f, RS1(insn)); | |
511 | rs2 = fpd_regval(f, RS2(insn)); | |
512 | rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); | |
513 | for (word = 0; word < 2; word++) { | |
514 | unsigned long val; | |
515 | s32 src = (rs2 >> (word * 32UL)); | |
516 | s64 scaled = src << scale; | |
517 | s64 from_fixed = scaled >> 23; | |
518 | ||
519 | val = ((from_fixed < 0) ? | |
520 | 0 : | |
521 | (from_fixed > 255) ? | |
522 | 255 : from_fixed); | |
523 | ||
524 | rd_val |= (val << (32 * word)); | |
525 | } | |
526 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
527 | break; | |
528 | } | |
529 | ||
530 | case FPACKFIX_OPF: { | |
531 | unsigned long word; | |
532 | ||
533 | rs2 = fpd_regval(f, RS2(insn)); | |
534 | ||
535 | rd_val = 0; | |
536 | for (word = 0; word < 2; word++) { | |
537 | long val; | |
538 | s32 src = (rs2 >> (word * 32UL)); | |
539 | s64 scaled = src << scale; | |
540 | s64 from_fixed = scaled >> 16; | |
541 | ||
542 | val = ((from_fixed < -32768) ? | |
543 | -32768 : | |
544 | (from_fixed > 32767) ? | |
545 | 32767 : from_fixed); | |
546 | ||
547 | rd_val |= ((val & 0xffff) << (word * 16)); | |
548 | } | |
549 | *fps_regaddr(f, RD(insn)) = rd_val; | |
550 | break; | |
551 | } | |
552 | ||
553 | case FEXPAND_OPF: { | |
554 | unsigned long byte; | |
555 | ||
556 | rs2 = fps_regval(f, RS2(insn)); | |
557 | ||
558 | rd_val = 0; | |
559 | for (byte = 0; byte < 4; byte++) { | |
560 | unsigned long val; | |
561 | u8 src = (rs2 >> (byte * 8)) & 0xff; | |
562 | ||
563 | val = src << 4; | |
564 | ||
565 | rd_val |= (val << (byte * 16)); | |
566 | } | |
567 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
568 | break; | |
569 | } | |
570 | ||
571 | case FPMERGE_OPF: { | |
572 | rs1 = fps_regval(f, RS1(insn)); | |
573 | rs2 = fps_regval(f, RS2(insn)); | |
574 | ||
575 | rd_val = (((rs2 & 0x000000ff) << 0) | | |
576 | ((rs1 & 0x000000ff) << 8) | | |
577 | ((rs2 & 0x0000ff00) << 8) | | |
578 | ((rs1 & 0x0000ff00) << 16) | | |
579 | ((rs2 & 0x00ff0000) << 16) | | |
580 | ((rs1 & 0x00ff0000) << 24) | | |
581 | ((rs2 & 0xff000000) << 24) | | |
582 | ((rs1 & 0xff000000) << 32)); | |
583 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
584 | break; | |
585 | } | |
6cb79b3f | 586 | } |
0c51ed93 DM |
587 | } |
588 | ||
589 | static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
590 | { | |
591 | struct fpustate *f = FPUSTATE; | |
592 | unsigned long rs1, rs2, rd_val; | |
593 | ||
594 | switch (opf) { | |
595 | case FMUL8x16_OPF: { | |
596 | unsigned long byte; | |
597 | ||
598 | rs1 = fps_regval(f, RS1(insn)); | |
599 | rs2 = fpd_regval(f, RS2(insn)); | |
600 | ||
601 | rd_val = 0; | |
602 | for (byte = 0; byte < 4; byte++) { | |
603 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
604 | s16 src2 = (rs2 >> (byte * 16)) & 0xffff; | |
605 | u32 prod = src1 * src2; | |
606 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
607 | ||
608 | /* Round up. */ | |
609 | if (prod & 0x80) | |
610 | scaled++; | |
611 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
612 | } | |
613 | ||
614 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
615 | break; | |
616 | } | |
617 | ||
618 | case FMUL8x16AU_OPF: | |
619 | case FMUL8x16AL_OPF: { | |
620 | unsigned long byte; | |
621 | s16 src2; | |
622 | ||
623 | rs1 = fps_regval(f, RS1(insn)); | |
624 | rs2 = fps_regval(f, RS2(insn)); | |
625 | ||
626 | rd_val = 0; | |
88b938e6 | 627 | src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); |
0c51ed93 DM |
628 | for (byte = 0; byte < 4; byte++) { |
629 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
630 | u32 prod = src1 * src2; | |
631 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
632 | ||
633 | /* Round up. */ | |
634 | if (prod & 0x80) | |
635 | scaled++; | |
636 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
637 | } | |
638 | ||
639 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
640 | break; | |
641 | } | |
642 | ||
643 | case FMUL8SUx16_OPF: | |
644 | case FMUL8ULx16_OPF: { | |
645 | unsigned long byte, ushift; | |
646 | ||
647 | rs1 = fpd_regval(f, RS1(insn)); | |
648 | rs2 = fpd_regval(f, RS2(insn)); | |
649 | ||
650 | rd_val = 0; | |
651 | ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; | |
652 | for (byte = 0; byte < 4; byte++) { | |
653 | u16 src1; | |
654 | s16 src2; | |
655 | u32 prod; | |
656 | u16 scaled; | |
657 | ||
658 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
659 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
660 | prod = src1 * src2; | |
661 | scaled = ((prod & 0x00ffff00) >> 8); | |
662 | ||
663 | /* Round up. */ | |
664 | if (prod & 0x80) | |
665 | scaled++; | |
666 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
667 | } | |
668 | ||
669 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
670 | break; | |
671 | } | |
672 | ||
673 | case FMULD8SUx16_OPF: | |
674 | case FMULD8ULx16_OPF: { | |
675 | unsigned long byte, ushift; | |
676 | ||
677 | rs1 = fps_regval(f, RS1(insn)); | |
678 | rs2 = fps_regval(f, RS2(insn)); | |
679 | ||
680 | rd_val = 0; | |
681 | ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; | |
682 | for (byte = 0; byte < 2; byte++) { | |
683 | u16 src1; | |
684 | s16 src2; | |
685 | u32 prod; | |
686 | u16 scaled; | |
687 | ||
688 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
689 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
690 | prod = src1 * src2; | |
691 | scaled = ((prod & 0x00ffff00) >> 8); | |
692 | ||
693 | /* Round up. */ | |
694 | if (prod & 0x80) | |
695 | scaled++; | |
696 | rd_val |= ((scaled & 0xffffUL) << | |
697 | ((byte * 32UL) + 7UL)); | |
698 | } | |
699 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
700 | break; | |
701 | } | |
6cb79b3f | 702 | } |
0c51ed93 DM |
703 | } |
704 | ||
705 | static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
706 | { | |
707 | struct fpustate *f = FPUSTATE; | |
708 | unsigned long rs1, rs2, rd_val, i; | |
709 | ||
710 | rs1 = fpd_regval(f, RS1(insn)); | |
711 | rs2 = fpd_regval(f, RS2(insn)); | |
712 | ||
713 | rd_val = 0; | |
714 | ||
715 | switch (opf) { | |
716 | case FCMPGT16_OPF: | |
717 | for (i = 0; i < 4; i++) { | |
718 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
719 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
720 | ||
721 | if (a > b) | |
2e8ecdc0 | 722 | rd_val |= 8 >> i; |
0c51ed93 DM |
723 | } |
724 | break; | |
725 | ||
726 | case FCMPGT32_OPF: | |
727 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
728 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
729 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
730 | |
731 | if (a > b) | |
2e8ecdc0 | 732 | rd_val |= 2 >> i; |
0c51ed93 DM |
733 | } |
734 | break; | |
735 | ||
736 | case FCMPLE16_OPF: | |
737 | for (i = 0; i < 4; i++) { | |
738 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
739 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
740 | ||
741 | if (a <= b) | |
2e8ecdc0 | 742 | rd_val |= 8 >> i; |
0c51ed93 DM |
743 | } |
744 | break; | |
745 | ||
746 | case FCMPLE32_OPF: | |
747 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
748 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
749 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
750 | |
751 | if (a <= b) | |
2e8ecdc0 | 752 | rd_val |= 2 >> i; |
0c51ed93 DM |
753 | } |
754 | break; | |
755 | ||
756 | case FCMPNE16_OPF: | |
757 | for (i = 0; i < 4; i++) { | |
758 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
759 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
760 | ||
761 | if (a != b) | |
2e8ecdc0 | 762 | rd_val |= 8 >> i; |
0c51ed93 DM |
763 | } |
764 | break; | |
765 | ||
766 | case FCMPNE32_OPF: | |
767 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
768 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
769 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
770 | |
771 | if (a != b) | |
2e8ecdc0 | 772 | rd_val |= 2 >> i; |
0c51ed93 DM |
773 | } |
774 | break; | |
775 | ||
776 | case FCMPEQ16_OPF: | |
777 | for (i = 0; i < 4; i++) { | |
778 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
779 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
780 | ||
781 | if (a == b) | |
2e8ecdc0 | 782 | rd_val |= 8 >> i; |
0c51ed93 DM |
783 | } |
784 | break; | |
785 | ||
786 | case FCMPEQ32_OPF: | |
787 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
788 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
789 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
790 | |
791 | if (a == b) | |
2e8ecdc0 | 792 | rd_val |= 2 >> i; |
0c51ed93 DM |
793 | } |
794 | break; | |
6cb79b3f | 795 | } |
0c51ed93 DM |
796 | |
797 | maybe_flush_windows(0, 0, RD(insn), 0); | |
798 | store_reg(regs, rd_val, RD(insn)); | |
799 | } | |
800 | ||
801 | /* Emulate the VIS instructions which are not implemented in | |
802 | * hardware on Niagara. | |
803 | */ | |
804 | int vis_emul(struct pt_regs *regs, unsigned int insn) | |
805 | { | |
806 | unsigned long pc = regs->tpc; | |
807 | unsigned int opf; | |
808 | ||
809 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
810 | ||
a8b0ca17 | 811 | perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); |
121dd5f2 | 812 | |
0c51ed93 DM |
813 | if (test_thread_flag(TIF_32BIT)) |
814 | pc = (u32)pc; | |
815 | ||
816 | if (get_user(insn, (u32 __user *) pc)) | |
817 | return -EFAULT; | |
818 | ||
410d2c81 HP |
819 | save_and_clear_fpu(); |
820 | ||
0c51ed93 DM |
821 | opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; |
822 | switch (opf) { | |
823 | default: | |
824 | return -EINVAL; | |
825 | ||
826 | /* Pixel Formatting Instructions. */ | |
827 | case FPACK16_OPF: | |
828 | case FPACK32_OPF: | |
829 | case FPACKFIX_OPF: | |
830 | case FEXPAND_OPF: | |
831 | case FPMERGE_OPF: | |
832 | pformat(regs, insn, opf); | |
833 | break; | |
834 | ||
835 | /* Partitioned Multiply Instructions */ | |
836 | case FMUL8x16_OPF: | |
837 | case FMUL8x16AU_OPF: | |
838 | case FMUL8x16AL_OPF: | |
839 | case FMUL8SUx16_OPF: | |
840 | case FMUL8ULx16_OPF: | |
841 | case FMULD8SUx16_OPF: | |
842 | case FMULD8ULx16_OPF: | |
843 | pmul(regs, insn, opf); | |
844 | break; | |
845 | ||
846 | /* Pixel Compare Instructions */ | |
847 | case FCMPGT16_OPF: | |
848 | case FCMPGT32_OPF: | |
849 | case FCMPLE16_OPF: | |
850 | case FCMPLE32_OPF: | |
851 | case FCMPNE16_OPF: | |
852 | case FCMPNE32_OPF: | |
853 | case FCMPEQ16_OPF: | |
854 | case FCMPEQ32_OPF: | |
855 | pcmp(regs, insn, opf); | |
856 | break; | |
857 | ||
858 | /* Edge Handling Instructions */ | |
859 | case EDGE8_OPF: | |
860 | case EDGE8N_OPF: | |
861 | case EDGE8L_OPF: | |
862 | case EDGE8LN_OPF: | |
863 | case EDGE16_OPF: | |
864 | case EDGE16N_OPF: | |
865 | case EDGE16L_OPF: | |
866 | case EDGE16LN_OPF: | |
867 | case EDGE32_OPF: | |
868 | case EDGE32N_OPF: | |
869 | case EDGE32L_OPF: | |
870 | case EDGE32LN_OPF: | |
871 | edge(regs, insn, opf); | |
872 | break; | |
873 | ||
874 | /* Pixel Component Distance */ | |
875 | case PDIST_OPF: | |
876 | pdist(regs, insn); | |
877 | break; | |
878 | ||
879 | /* Three-Dimensional Array Addressing Instructions */ | |
880 | case ARRAY8_OPF: | |
881 | case ARRAY16_OPF: | |
882 | case ARRAY32_OPF: | |
883 | array(regs, insn, opf); | |
884 | break; | |
885 | ||
886 | /* Byte Mask and Shuffle Instructions */ | |
887 | case BMASK_OPF: | |
888 | bmask(regs, insn); | |
889 | break; | |
890 | ||
891 | case BSHUFFLE_OPF: | |
892 | bshuffle(regs, insn); | |
893 | break; | |
6cb79b3f | 894 | } |
0c51ed93 DM |
895 | |
896 | regs->tpc = regs->tnpc; | |
897 | regs->tnpc += 4; | |
898 | return 0; | |
899 | } |