]> git.proxmox.com Git - qemu.git/blob - target-ppc/int_helper.c
Merge remote-tracking branch 'spice/spice.v58' into staging
[qemu.git] / target-ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "cpu.h"
20 #include "host-utils.h"
21 #include "helper.h"
22
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
27
28 /* multiply high word */
29 uint64_t helper_mulhd(uint64_t arg1, uint64_t arg2)
30 {
31 uint64_t tl, th;
32
33 muls64(&tl, &th, arg1, arg2);
34 return th;
35 }
36
37 /* multiply high word unsigned */
38 uint64_t helper_mulhdu(uint64_t arg1, uint64_t arg2)
39 {
40 uint64_t tl, th;
41
42 mulu64(&tl, &th, arg1, arg2);
43 return th;
44 }
45
46 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
47 {
48 int64_t th;
49 uint64_t tl;
50
51 muls64(&tl, (uint64_t *)&th, arg1, arg2);
52 /* If th != 0 && th != -1, then we had an overflow */
53 if (likely((uint64_t)(th + 1) <= 1)) {
54 env->xer &= ~(1 << XER_OV);
55 } else {
56 env->xer |= (1 << XER_OV) | (1 << XER_SO);
57 }
58 return (int64_t)tl;
59 }
60 #endif
61
62 target_ulong helper_cntlzw(target_ulong t)
63 {
64 return clz32(t);
65 }
66
67 #if defined(TARGET_PPC64)
68 target_ulong helper_cntlzd(target_ulong t)
69 {
70 return clz64(t);
71 }
72 #endif
73
74 /* shift right arithmetic helper */
75 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
76 target_ulong shift)
77 {
78 int32_t ret;
79
80 if (likely(!(shift & 0x20))) {
81 if (likely((uint32_t)shift != 0)) {
82 shift &= 0x1f;
83 ret = (int32_t)value >> shift;
84 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
85 env->xer &= ~(1 << XER_CA);
86 } else {
87 env->xer |= (1 << XER_CA);
88 }
89 } else {
90 ret = (int32_t)value;
91 env->xer &= ~(1 << XER_CA);
92 }
93 } else {
94 ret = (int32_t)value >> 31;
95 if (ret) {
96 env->xer |= (1 << XER_CA);
97 } else {
98 env->xer &= ~(1 << XER_CA);
99 }
100 }
101 return (target_long)ret;
102 }
103
104 #if defined(TARGET_PPC64)
105 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
106 target_ulong shift)
107 {
108 int64_t ret;
109
110 if (likely(!(shift & 0x40))) {
111 if (likely((uint64_t)shift != 0)) {
112 shift &= 0x3f;
113 ret = (int64_t)value >> shift;
114 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
115 env->xer &= ~(1 << XER_CA);
116 } else {
117 env->xer |= (1 << XER_CA);
118 }
119 } else {
120 ret = (int64_t)value;
121 env->xer &= ~(1 << XER_CA);
122 }
123 } else {
124 ret = (int64_t)value >> 63;
125 if (ret) {
126 env->xer |= (1 << XER_CA);
127 } else {
128 env->xer &= ~(1 << XER_CA);
129 }
130 }
131 return ret;
132 }
133 #endif
134
135 #if defined(TARGET_PPC64)
136 target_ulong helper_popcntb(target_ulong val)
137 {
138 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
139 0x5555555555555555ULL);
140 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
141 0x3333333333333333ULL);
142 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
143 0x0f0f0f0f0f0f0f0fULL);
144 return val;
145 }
146
147 target_ulong helper_popcntw(target_ulong val)
148 {
149 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
150 0x5555555555555555ULL);
151 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
152 0x3333333333333333ULL);
153 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
154 0x0f0f0f0f0f0f0f0fULL);
155 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
156 0x00ff00ff00ff00ffULL);
157 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
158 0x0000ffff0000ffffULL);
159 return val;
160 }
161
162 target_ulong helper_popcntd(target_ulong val)
163 {
164 return ctpop64(val);
165 }
166 #else
167 target_ulong helper_popcntb(target_ulong val)
168 {
169 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
170 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
171 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
172 return val;
173 }
174
175 target_ulong helper_popcntw(target_ulong val)
176 {
177 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
178 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
179 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
180 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
181 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
182 return val;
183 }
184 #endif
185
186 /*****************************************************************************/
187 /* PowerPC 601 specific instructions (POWER bridge) */
188 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
189 {
190 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
191
192 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
193 (int32_t)arg2 == 0) {
194 env->spr[SPR_MQ] = 0;
195 return INT32_MIN;
196 } else {
197 env->spr[SPR_MQ] = tmp % arg2;
198 return tmp / (int32_t)arg2;
199 }
200 }
201
202 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
203 target_ulong arg2)
204 {
205 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
206
207 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
208 (int32_t)arg2 == 0) {
209 env->xer |= (1 << XER_OV) | (1 << XER_SO);
210 env->spr[SPR_MQ] = 0;
211 return INT32_MIN;
212 } else {
213 env->spr[SPR_MQ] = tmp % arg2;
214 tmp /= (int32_t)arg2;
215 if ((int32_t)tmp != tmp) {
216 env->xer |= (1 << XER_OV) | (1 << XER_SO);
217 } else {
218 env->xer &= ~(1 << XER_OV);
219 }
220 return tmp;
221 }
222 }
223
224 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
225 target_ulong arg2)
226 {
227 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
228 (int32_t)arg2 == 0) {
229 env->spr[SPR_MQ] = 0;
230 return INT32_MIN;
231 } else {
232 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
233 return (int32_t)arg1 / (int32_t)arg2;
234 }
235 }
236
237 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
238 target_ulong arg2)
239 {
240 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
241 (int32_t)arg2 == 0) {
242 env->xer |= (1 << XER_OV) | (1 << XER_SO);
243 env->spr[SPR_MQ] = 0;
244 return INT32_MIN;
245 } else {
246 env->xer &= ~(1 << XER_OV);
247 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
248 return (int32_t)arg1 / (int32_t)arg2;
249 }
250 }
251
252 /*****************************************************************************/
253 /* 602 specific instructions */
254 /* mfrom is the most crazy instruction ever seen, imho ! */
255 /* Real implementation uses a ROM table. Do the same */
256 /* Extremely decomposed:
257 * -arg / 256
258 * return 256 * log10(10 + 1.0) + 0.5
259 */
260 #if !defined(CONFIG_USER_ONLY)
261 target_ulong helper_602_mfrom(target_ulong arg)
262 {
263 if (likely(arg < 602)) {
264 #include "mfrom_table.c"
265 return mfrom_ROM_table[arg];
266 } else {
267 return 0;
268 }
269 }
270 #endif
271
272 /*****************************************************************************/
273 /* Altivec extension helpers */
274 #if defined(HOST_WORDS_BIGENDIAN)
275 #define HI_IDX 0
276 #define LO_IDX 1
277 #else
278 #define HI_IDX 1
279 #define LO_IDX 0
280 #endif
281
282 #if defined(HOST_WORDS_BIGENDIAN)
283 #define VECTOR_FOR_INORDER_I(index, element) \
284 for (index = 0; index < ARRAY_SIZE(r->element); index++)
285 #else
286 #define VECTOR_FOR_INORDER_I(index, element) \
287 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
288 #endif
289
290 /* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise,
291 * execute the following block. */
292 #define DO_HANDLE_NAN(result, x) \
293 if (float32_is_any_nan(x)) { \
294 CPU_FloatU __f; \
295 __f.f = x; \
296 __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \
297 result = __f.f; \
298 } else
299
300 #define HANDLE_NAN1(result, x) \
301 DO_HANDLE_NAN(result, x)
302 #define HANDLE_NAN2(result, x, y) \
303 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
304 #define HANDLE_NAN3(result, x, y, z) \
305 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
306
307 /* Saturating arithmetic helpers. */
308 #define SATCVT(from, to, from_type, to_type, min, max) \
309 static inline to_type cvt##from##to(from_type x, int *sat) \
310 { \
311 to_type r; \
312 \
313 if (x < (from_type)min) { \
314 r = min; \
315 *sat = 1; \
316 } else if (x > (from_type)max) { \
317 r = max; \
318 *sat = 1; \
319 } else { \
320 r = x; \
321 } \
322 return r; \
323 }
324 #define SATCVTU(from, to, from_type, to_type, min, max) \
325 static inline to_type cvt##from##to(from_type x, int *sat) \
326 { \
327 to_type r; \
328 \
329 if (x > (from_type)max) { \
330 r = max; \
331 *sat = 1; \
332 } else { \
333 r = x; \
334 } \
335 return r; \
336 }
337 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
338 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
339 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
340
341 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
342 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
343 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
344 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
345 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
346 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
347 #undef SATCVT
348 #undef SATCVTU
349
350 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
351 {
352 int i, j = (sh & 0xf);
353
354 VECTOR_FOR_INORDER_I(i, u8) {
355 r->u8[i] = j++;
356 }
357 }
358
359 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
360 {
361 int i, j = 0x10 - (sh & 0xf);
362
363 VECTOR_FOR_INORDER_I(i, u8) {
364 r->u8[i] = j++;
365 }
366 }
367
368 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
369 {
370 #if defined(HOST_WORDS_BIGENDIAN)
371 env->vscr = r->u32[3];
372 #else
373 env->vscr = r->u32[0];
374 #endif
375 set_flush_to_zero(vscr_nj, &env->vec_status);
376 }
377
378 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
379 {
380 int i;
381
382 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
383 r->u32[i] = ~a->u32[i] < b->u32[i];
384 }
385 }
386
387 #define VARITH_DO(name, op, element) \
388 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
389 { \
390 int i; \
391 \
392 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
393 r->element[i] = a->element[i] op b->element[i]; \
394 } \
395 }
396 #define VARITH(suffix, element) \
397 VARITH_DO(add##suffix, +, element) \
398 VARITH_DO(sub##suffix, -, element)
399 VARITH(ubm, u8)
400 VARITH(uhm, u16)
401 VARITH(uwm, u32)
402 #undef VARITH_DO
403 #undef VARITH
404
405 #define VARITHFP(suffix, func) \
406 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
407 ppc_avr_t *b) \
408 { \
409 int i; \
410 \
411 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
412 HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \
413 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
414 } \
415 } \
416 }
417 VARITHFP(addfp, float32_add)
418 VARITHFP(subfp, float32_sub)
419 #undef VARITHFP
420
421 #define VARITHSAT_CASE(type, op, cvt, element) \
422 { \
423 type result = (type)a->element[i] op (type)b->element[i]; \
424 r->element[i] = cvt(result, &sat); \
425 }
426
427 #define VARITHSAT_DO(name, op, optype, cvt, element) \
428 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
429 ppc_avr_t *b) \
430 { \
431 int sat = 0; \
432 int i; \
433 \
434 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
435 switch (sizeof(r->element[0])) { \
436 case 1: \
437 VARITHSAT_CASE(optype, op, cvt, element); \
438 break; \
439 case 2: \
440 VARITHSAT_CASE(optype, op, cvt, element); \
441 break; \
442 case 4: \
443 VARITHSAT_CASE(optype, op, cvt, element); \
444 break; \
445 } \
446 } \
447 if (sat) { \
448 env->vscr |= (1 << VSCR_SAT); \
449 } \
450 }
451 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
452 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
453 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
454 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
455 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
456 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
457 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
458 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
459 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
460 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
461 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
462 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
463 #undef VARITHSAT_CASE
464 #undef VARITHSAT_DO
465 #undef VARITHSAT_SIGNED
466 #undef VARITHSAT_UNSIGNED
467
468 #define VAVG_DO(name, element, etype) \
469 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
470 { \
471 int i; \
472 \
473 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
474 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
475 r->element[i] = x >> 1; \
476 } \
477 }
478
479 #define VAVG(type, signed_element, signed_type, unsigned_element, \
480 unsigned_type) \
481 VAVG_DO(avgs##type, signed_element, signed_type) \
482 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
483 VAVG(b, s8, int16_t, u8, uint16_t)
484 VAVG(h, s16, int32_t, u16, uint32_t)
485 VAVG(w, s32, int64_t, u32, uint64_t)
486 #undef VAVG_DO
487 #undef VAVG
488
489 #define VCF(suffix, cvt, element) \
490 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
491 ppc_avr_t *b, uint32_t uim) \
492 { \
493 int i; \
494 \
495 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
496 float32 t = cvt(b->element[i], &env->vec_status); \
497 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
498 } \
499 }
500 VCF(ux, uint32_to_float32, u32)
501 VCF(sx, int32_to_float32, s32)
502 #undef VCF
503
504 #define VCMP_DO(suffix, compare, element, record) \
505 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
506 ppc_avr_t *a, ppc_avr_t *b) \
507 { \
508 uint32_t ones = (uint32_t)-1; \
509 uint32_t all = ones; \
510 uint32_t none = 0; \
511 int i; \
512 \
513 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
514 uint32_t result = (a->element[i] compare b->element[i] ? \
515 ones : 0x0); \
516 switch (sizeof(a->element[0])) { \
517 case 4: \
518 r->u32[i] = result; \
519 break; \
520 case 2: \
521 r->u16[i] = result; \
522 break; \
523 case 1: \
524 r->u8[i] = result; \
525 break; \
526 } \
527 all &= result; \
528 none |= result; \
529 } \
530 if (record) { \
531 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
532 } \
533 }
534 #define VCMP(suffix, compare, element) \
535 VCMP_DO(suffix, compare, element, 0) \
536 VCMP_DO(suffix##_dot, compare, element, 1)
537 VCMP(equb, ==, u8)
538 VCMP(equh, ==, u16)
539 VCMP(equw, ==, u32)
540 VCMP(gtub, >, u8)
541 VCMP(gtuh, >, u16)
542 VCMP(gtuw, >, u32)
543 VCMP(gtsb, >, s8)
544 VCMP(gtsh, >, s16)
545 VCMP(gtsw, >, s32)
546 #undef VCMP_DO
547 #undef VCMP
548
549 #define VCMPFP_DO(suffix, compare, order, record) \
550 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
551 ppc_avr_t *a, ppc_avr_t *b) \
552 { \
553 uint32_t ones = (uint32_t)-1; \
554 uint32_t all = ones; \
555 uint32_t none = 0; \
556 int i; \
557 \
558 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
559 uint32_t result; \
560 int rel = float32_compare_quiet(a->f[i], b->f[i], \
561 &env->vec_status); \
562 if (rel == float_relation_unordered) { \
563 result = 0; \
564 } else if (rel compare order) { \
565 result = ones; \
566 } else { \
567 result = 0; \
568 } \
569 r->u32[i] = result; \
570 all &= result; \
571 none |= result; \
572 } \
573 if (record) { \
574 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
575 } \
576 }
577 #define VCMPFP(suffix, compare, order) \
578 VCMPFP_DO(suffix, compare, order, 0) \
579 VCMPFP_DO(suffix##_dot, compare, order, 1)
580 VCMPFP(eqfp, ==, float_relation_equal)
581 VCMPFP(gefp, !=, float_relation_less)
582 VCMPFP(gtfp, ==, float_relation_greater)
583 #undef VCMPFP_DO
584 #undef VCMPFP
585
586 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
587 ppc_avr_t *a, ppc_avr_t *b, int record)
588 {
589 int i;
590 int all_in = 0;
591
592 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
593 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
594 if (le_rel == float_relation_unordered) {
595 r->u32[i] = 0xc0000000;
596 /* ALL_IN does not need to be updated here. */
597 } else {
598 float32 bneg = float32_chs(b->f[i]);
599 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
600 int le = le_rel != float_relation_greater;
601 int ge = ge_rel != float_relation_less;
602
603 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
604 all_in |= (!le | !ge);
605 }
606 }
607 if (record) {
608 env->crf[6] = (all_in == 0) << 1;
609 }
610 }
611
612 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
613 {
614 vcmpbfp_internal(env, r, a, b, 0);
615 }
616
617 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
618 ppc_avr_t *b)
619 {
620 vcmpbfp_internal(env, r, a, b, 1);
621 }
622
623 #define VCT(suffix, satcvt, element) \
624 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
625 ppc_avr_t *b, uint32_t uim) \
626 { \
627 int i; \
628 int sat = 0; \
629 float_status s = env->vec_status; \
630 \
631 set_float_rounding_mode(float_round_to_zero, &s); \
632 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
633 if (float32_is_any_nan(b->f[i])) { \
634 r->element[i] = 0; \
635 } else { \
636 float64 t = float32_to_float64(b->f[i], &s); \
637 int64_t j; \
638 \
639 t = float64_scalbn(t, uim, &s); \
640 j = float64_to_int64(t, &s); \
641 r->element[i] = satcvt(j, &sat); \
642 } \
643 } \
644 if (sat) { \
645 env->vscr |= (1 << VSCR_SAT); \
646 } \
647 }
648 VCT(uxs, cvtsduw, u32)
649 VCT(sxs, cvtsdsw, s32)
650 #undef VCT
651
652 void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
653 ppc_avr_t *c)
654 {
655 int i;
656
657 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
658 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
659 /* Need to do the computation in higher precision and round
660 * once at the end. */
661 float64 af, bf, cf, t;
662
663 af = float32_to_float64(a->f[i], &env->vec_status);
664 bf = float32_to_float64(b->f[i], &env->vec_status);
665 cf = float32_to_float64(c->f[i], &env->vec_status);
666 t = float64_mul(af, cf, &env->vec_status);
667 t = float64_add(t, bf, &env->vec_status);
668 r->f[i] = float64_to_float32(t, &env->vec_status);
669 }
670 }
671 }
672
673 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
674 ppc_avr_t *b, ppc_avr_t *c)
675 {
676 int sat = 0;
677 int i;
678
679 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
680 int32_t prod = a->s16[i] * b->s16[i];
681 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
682
683 r->s16[i] = cvtswsh(t, &sat);
684 }
685
686 if (sat) {
687 env->vscr |= (1 << VSCR_SAT);
688 }
689 }
690
691 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
692 ppc_avr_t *b, ppc_avr_t *c)
693 {
694 int sat = 0;
695 int i;
696
697 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
698 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
699 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
700 r->s16[i] = cvtswsh(t, &sat);
701 }
702
703 if (sat) {
704 env->vscr |= (1 << VSCR_SAT);
705 }
706 }
707
708 #define VMINMAX_DO(name, compare, element) \
709 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
710 { \
711 int i; \
712 \
713 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
714 if (a->element[i] compare b->element[i]) { \
715 r->element[i] = b->element[i]; \
716 } else { \
717 r->element[i] = a->element[i]; \
718 } \
719 } \
720 }
721 #define VMINMAX(suffix, element) \
722 VMINMAX_DO(min##suffix, >, element) \
723 VMINMAX_DO(max##suffix, <, element)
724 VMINMAX(sb, s8)
725 VMINMAX(sh, s16)
726 VMINMAX(sw, s32)
727 VMINMAX(ub, u8)
728 VMINMAX(uh, u16)
729 VMINMAX(uw, u32)
730 #undef VMINMAX_DO
731 #undef VMINMAX
732
733 #define VMINMAXFP(suffix, rT, rF) \
734 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
735 ppc_avr_t *b) \
736 { \
737 int i; \
738 \
739 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
740 HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \
741 if (float32_lt_quiet(a->f[i], b->f[i], \
742 &env->vec_status)) { \
743 r->f[i] = rT->f[i]; \
744 } else { \
745 r->f[i] = rF->f[i]; \
746 } \
747 } \
748 } \
749 }
750 VMINMAXFP(minfp, a, b)
751 VMINMAXFP(maxfp, b, a)
752 #undef VMINMAXFP
753
754 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
755 {
756 int i;
757
758 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
759 int32_t prod = a->s16[i] * b->s16[i];
760 r->s16[i] = (int16_t) (prod + c->s16[i]);
761 }
762 }
763
764 #define VMRG_DO(name, element, highp) \
765 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
766 { \
767 ppc_avr_t result; \
768 int i; \
769 size_t n_elems = ARRAY_SIZE(r->element); \
770 \
771 for (i = 0; i < n_elems / 2; i++) { \
772 if (highp) { \
773 result.element[i*2+HI_IDX] = a->element[i]; \
774 result.element[i*2+LO_IDX] = b->element[i]; \
775 } else { \
776 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
777 b->element[n_elems - i - 1]; \
778 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
779 a->element[n_elems - i - 1]; \
780 } \
781 } \
782 *r = result; \
783 }
784 #if defined(HOST_WORDS_BIGENDIAN)
785 #define MRGHI 0
786 #define MRGLO 1
787 #else
788 #define MRGHI 1
789 #define MRGLO 0
790 #endif
791 #define VMRG(suffix, element) \
792 VMRG_DO(mrgl##suffix, element, MRGHI) \
793 VMRG_DO(mrgh##suffix, element, MRGLO)
794 VMRG(b, u8)
795 VMRG(h, u16)
796 VMRG(w, u32)
797 #undef VMRG_DO
798 #undef VMRG
799 #undef MRGHI
800 #undef MRGLO
801
802 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
803 ppc_avr_t *b, ppc_avr_t *c)
804 {
805 int32_t prod[16];
806 int i;
807
808 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
809 prod[i] = (int32_t)a->s8[i] * b->u8[i];
810 }
811
812 VECTOR_FOR_INORDER_I(i, s32) {
813 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
814 prod[4 * i + 2] + prod[4 * i + 3];
815 }
816 }
817
818 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
819 ppc_avr_t *b, ppc_avr_t *c)
820 {
821 int32_t prod[8];
822 int i;
823
824 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
825 prod[i] = a->s16[i] * b->s16[i];
826 }
827
828 VECTOR_FOR_INORDER_I(i, s32) {
829 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
830 }
831 }
832
833 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
834 ppc_avr_t *b, ppc_avr_t *c)
835 {
836 int32_t prod[8];
837 int i;
838 int sat = 0;
839
840 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
841 prod[i] = (int32_t)a->s16[i] * b->s16[i];
842 }
843
844 VECTOR_FOR_INORDER_I(i, s32) {
845 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
846
847 r->u32[i] = cvtsdsw(t, &sat);
848 }
849
850 if (sat) {
851 env->vscr |= (1 << VSCR_SAT);
852 }
853 }
854
855 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
856 ppc_avr_t *b, ppc_avr_t *c)
857 {
858 uint16_t prod[16];
859 int i;
860
861 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
862 prod[i] = a->u8[i] * b->u8[i];
863 }
864
865 VECTOR_FOR_INORDER_I(i, u32) {
866 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
867 prod[4 * i + 2] + prod[4 * i + 3];
868 }
869 }
870
871 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
872 ppc_avr_t *b, ppc_avr_t *c)
873 {
874 uint32_t prod[8];
875 int i;
876
877 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
878 prod[i] = a->u16[i] * b->u16[i];
879 }
880
881 VECTOR_FOR_INORDER_I(i, u32) {
882 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
883 }
884 }
885
886 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
887 ppc_avr_t *b, ppc_avr_t *c)
888 {
889 uint32_t prod[8];
890 int i;
891 int sat = 0;
892
893 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
894 prod[i] = a->u16[i] * b->u16[i];
895 }
896
897 VECTOR_FOR_INORDER_I(i, s32) {
898 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
899
900 r->u32[i] = cvtuduw(t, &sat);
901 }
902
903 if (sat) {
904 env->vscr |= (1 << VSCR_SAT);
905 }
906 }
907
908 #define VMUL_DO(name, mul_element, prod_element, evenp) \
909 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
910 { \
911 int i; \
912 \
913 VECTOR_FOR_INORDER_I(i, prod_element) { \
914 if (evenp) { \
915 r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] * \
916 b->mul_element[i * 2 + HI_IDX]; \
917 } else { \
918 r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] * \
919 b->mul_element[i * 2 + LO_IDX]; \
920 } \
921 } \
922 }
923 #define VMUL(suffix, mul_element, prod_element) \
924 VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
925 VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
926 VMUL(sb, s8, s16)
927 VMUL(sh, s16, s32)
928 VMUL(ub, u8, u16)
929 VMUL(uh, u16, u32)
930 #undef VMUL_DO
931 #undef VMUL
932
933 void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
934 ppc_avr_t *b, ppc_avr_t *c)
935 {
936 int i;
937
938 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
939 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
940 /* Need to do the computation is higher precision and round
941 * once at the end. */
942 float64 af, bf, cf, t;
943
944 af = float32_to_float64(a->f[i], &env->vec_status);
945 bf = float32_to_float64(b->f[i], &env->vec_status);
946 cf = float32_to_float64(c->f[i], &env->vec_status);
947 t = float64_mul(af, cf, &env->vec_status);
948 t = float64_sub(t, bf, &env->vec_status);
949 t = float64_chs(t);
950 r->f[i] = float64_to_float32(t, &env->vec_status);
951 }
952 }
953 }
954
955 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
956 ppc_avr_t *c)
957 {
958 ppc_avr_t result;
959 int i;
960
961 VECTOR_FOR_INORDER_I(i, u8) {
962 int s = c->u8[i] & 0x1f;
963 #if defined(HOST_WORDS_BIGENDIAN)
964 int index = s & 0xf;
965 #else
966 int index = 15 - (s & 0xf);
967 #endif
968
969 if (s & 0x10) {
970 result.u8[i] = b->u8[index];
971 } else {
972 result.u8[i] = a->u8[index];
973 }
974 }
975 *r = result;
976 }
977
978 #if defined(HOST_WORDS_BIGENDIAN)
979 #define PKBIG 1
980 #else
981 #define PKBIG 0
982 #endif
983 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
984 {
985 int i, j;
986 ppc_avr_t result;
987 #if defined(HOST_WORDS_BIGENDIAN)
988 const ppc_avr_t *x[2] = { a, b };
989 #else
990 const ppc_avr_t *x[2] = { b, a };
991 #endif
992
993 VECTOR_FOR_INORDER_I(i, u64) {
994 VECTOR_FOR_INORDER_I(j, u32) {
995 uint32_t e = x[i]->u32[j];
996
997 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
998 ((e >> 6) & 0x3e0) |
999 ((e >> 3) & 0x1f));
1000 }
1001 }
1002 *r = result;
1003 }
1004
1005 #define VPK(suffix, from, to, cvt, dosat) \
1006 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1007 ppc_avr_t *a, ppc_avr_t *b) \
1008 { \
1009 int i; \
1010 int sat = 0; \
1011 ppc_avr_t result; \
1012 ppc_avr_t *a0 = PKBIG ? a : b; \
1013 ppc_avr_t *a1 = PKBIG ? b : a; \
1014 \
1015 VECTOR_FOR_INORDER_I(i, from) { \
1016 result.to[i] = cvt(a0->from[i], &sat); \
1017 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1018 } \
1019 *r = result; \
1020 if (dosat && sat) { \
1021 env->vscr |= (1 << VSCR_SAT); \
1022 } \
1023 }
1024 #define I(x, y) (x)
1025 VPK(shss, s16, s8, cvtshsb, 1)
1026 VPK(shus, s16, u8, cvtshub, 1)
1027 VPK(swss, s32, s16, cvtswsh, 1)
1028 VPK(swus, s32, u16, cvtswuh, 1)
1029 VPK(uhus, u16, u8, cvtuhub, 1)
1030 VPK(uwus, u32, u16, cvtuwuh, 1)
1031 VPK(uhum, u16, u8, I, 0)
1032 VPK(uwum, u32, u16, I, 0)
1033 #undef I
1034 #undef VPK
1035 #undef PKBIG
1036
1037 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1038 {
1039 int i;
1040
1041 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1042 HANDLE_NAN1(r->f[i], b->f[i]) {
1043 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1044 }
1045 }
1046 }
1047
1048 #define VRFI(suffix, rounding) \
1049 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1050 ppc_avr_t *b) \
1051 { \
1052 int i; \
1053 float_status s = env->vec_status; \
1054 \
1055 set_float_rounding_mode(rounding, &s); \
1056 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1057 HANDLE_NAN1(r->f[i], b->f[i]) { \
1058 r->f[i] = float32_round_to_int (b->f[i], &s); \
1059 } \
1060 } \
1061 }
1062 VRFI(n, float_round_nearest_even)
1063 VRFI(m, float_round_down)
1064 VRFI(p, float_round_up)
1065 VRFI(z, float_round_to_zero)
1066 #undef VRFI
1067
1068 #define VROTATE(suffix, element) \
1069 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1070 { \
1071 int i; \
1072 \
1073 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1074 unsigned int mask = ((1 << \
1075 (3 + (sizeof(a->element[0]) >> 1))) \
1076 - 1); \
1077 unsigned int shift = b->element[i] & mask; \
1078 r->element[i] = (a->element[i] << shift) | \
1079 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1080 } \
1081 }
1082 VROTATE(b, u8)
1083 VROTATE(h, u16)
1084 VROTATE(w, u32)
1085 #undef VROTATE
1086
1087 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1088 {
1089 int i;
1090
1091 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1092 HANDLE_NAN1(r->f[i], b->f[i]) {
1093 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1094
1095 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1096 }
1097 }
1098 }
1099
1100 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1101 ppc_avr_t *c)
1102 {
1103 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1104 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1105 }
1106
1107 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1108 {
1109 int i;
1110
1111 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1112 HANDLE_NAN1(r->f[i], b->f[i]) {
1113 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1114 }
1115 }
1116 }
1117
1118 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1119 {
1120 int i;
1121
1122 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1123 HANDLE_NAN1(r->f[i], b->f[i]) {
1124 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1125 }
1126 }
1127 }
1128
1129 #if defined(HOST_WORDS_BIGENDIAN)
1130 #define LEFT 0
1131 #define RIGHT 1
1132 #else
1133 #define LEFT 1
1134 #define RIGHT 0
1135 #endif
1136 /* The specification says that the results are undefined if all of the
1137 * shift counts are not identical. We check to make sure that they are
1138 * to conform to what real hardware appears to do. */
1139 #define VSHIFT(suffix, leftp) \
1140 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1141 { \
1142 int shift = b->u8[LO_IDX*15] & 0x7; \
1143 int doit = 1; \
1144 int i; \
1145 \
1146 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1147 doit = doit && ((b->u8[i] & 0x7) == shift); \
1148 } \
1149 if (doit) { \
1150 if (shift == 0) { \
1151 *r = *a; \
1152 } else if (leftp) { \
1153 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1154 \
1155 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1156 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1157 } else { \
1158 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1159 \
1160 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1161 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1162 } \
1163 } \
1164 }
1165 VSHIFT(l, LEFT)
1166 VSHIFT(r, RIGHT)
1167 #undef VSHIFT
1168 #undef LEFT
1169 #undef RIGHT
1170
1171 #define VSL(suffix, element) \
1172 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1173 { \
1174 int i; \
1175 \
1176 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1177 unsigned int mask = ((1 << \
1178 (3 + (sizeof(a->element[0]) >> 1))) \
1179 - 1); \
1180 unsigned int shift = b->element[i] & mask; \
1181 \
1182 r->element[i] = a->element[i] << shift; \
1183 } \
1184 }
1185 VSL(b, u8)
1186 VSL(h, u16)
1187 VSL(w, u32)
1188 #undef VSL
1189
1190 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1191 {
1192 int sh = shift & 0xf;
1193 int i;
1194 ppc_avr_t result;
1195
1196 #if defined(HOST_WORDS_BIGENDIAN)
1197 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1198 int index = sh + i;
1199 if (index > 0xf) {
1200 result.u8[i] = b->u8[index - 0x10];
1201 } else {
1202 result.u8[i] = a->u8[index];
1203 }
1204 }
1205 #else
1206 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1207 int index = (16 - sh) + i;
1208 if (index > 0xf) {
1209 result.u8[i] = a->u8[index - 0x10];
1210 } else {
1211 result.u8[i] = b->u8[index];
1212 }
1213 }
1214 #endif
1215 *r = result;
1216 }
1217
1218 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1219 {
1220 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1221
1222 #if defined(HOST_WORDS_BIGENDIAN)
1223 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1224 memset(&r->u8[16-sh], 0, sh);
1225 #else
1226 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1227 memset(&r->u8[0], 0, sh);
1228 #endif
1229 }
1230
1231 /* Experimental testing shows that hardware masks the immediate. */
1232 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1233 #if defined(HOST_WORDS_BIGENDIAN)
1234 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1235 #else
1236 #define SPLAT_ELEMENT(element) \
1237 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1238 #endif
1239 #define VSPLT(suffix, element) \
1240 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1241 { \
1242 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1243 int i; \
1244 \
1245 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1246 r->element[i] = s; \
1247 } \
1248 }
1249 VSPLT(b, u8)
1250 VSPLT(h, u16)
1251 VSPLT(w, u32)
1252 #undef VSPLT
1253 #undef SPLAT_ELEMENT
1254 #undef _SPLAT_MASKED
1255
1256 #define VSPLTI(suffix, element, splat_type) \
1257 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1258 { \
1259 splat_type x = (int8_t)(splat << 3) >> 3; \
1260 int i; \
1261 \
1262 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1263 r->element[i] = x; \
1264 } \
1265 }
1266 VSPLTI(b, s8, int8_t)
1267 VSPLTI(h, s16, int16_t)
1268 VSPLTI(w, s32, int32_t)
1269 #undef VSPLTI
1270
1271 #define VSR(suffix, element) \
1272 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1273 { \
1274 int i; \
1275 \
1276 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1277 unsigned int mask = ((1 << \
1278 (3 + (sizeof(a->element[0]) >> 1))) \
1279 - 1); \
1280 unsigned int shift = b->element[i] & mask; \
1281 \
1282 r->element[i] = a->element[i] >> shift; \
1283 } \
1284 }
1285 VSR(ab, s8)
1286 VSR(ah, s16)
1287 VSR(aw, s32)
1288 VSR(b, u8)
1289 VSR(h, u16)
1290 VSR(w, u32)
1291 #undef VSR
1292
1293 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1294 {
1295 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1296
1297 #if defined(HOST_WORDS_BIGENDIAN)
1298 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1299 memset(&r->u8[0], 0, sh);
1300 #else
1301 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1302 memset(&r->u8[16 - sh], 0, sh);
1303 #endif
1304 }
1305
1306 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1307 {
1308 int i;
1309
1310 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1311 r->u32[i] = a->u32[i] >= b->u32[i];
1312 }
1313 }
1314
1315 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1316 {
1317 int64_t t;
1318 int i, upper;
1319 ppc_avr_t result;
1320 int sat = 0;
1321
1322 #if defined(HOST_WORDS_BIGENDIAN)
1323 upper = ARRAY_SIZE(r->s32)-1;
1324 #else
1325 upper = 0;
1326 #endif
1327 t = (int64_t)b->s32[upper];
1328 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1329 t += a->s32[i];
1330 result.s32[i] = 0;
1331 }
1332 result.s32[upper] = cvtsdsw(t, &sat);
1333 *r = result;
1334
1335 if (sat) {
1336 env->vscr |= (1 << VSCR_SAT);
1337 }
1338 }
1339
1340 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1341 {
1342 int i, j, upper;
1343 ppc_avr_t result;
1344 int sat = 0;
1345
1346 #if defined(HOST_WORDS_BIGENDIAN)
1347 upper = 1;
1348 #else
1349 upper = 0;
1350 #endif
1351 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1352 int64_t t = (int64_t)b->s32[upper + i * 2];
1353
1354 result.u64[i] = 0;
1355 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1356 t += a->s32[2 * i + j];
1357 }
1358 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1359 }
1360
1361 *r = result;
1362 if (sat) {
1363 env->vscr |= (1 << VSCR_SAT);
1364 }
1365 }
1366
1367 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1368 {
1369 int i, j;
1370 int sat = 0;
1371
1372 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1373 int64_t t = (int64_t)b->s32[i];
1374
1375 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1376 t += a->s8[4 * i + j];
1377 }
1378 r->s32[i] = cvtsdsw(t, &sat);
1379 }
1380
1381 if (sat) {
1382 env->vscr |= (1 << VSCR_SAT);
1383 }
1384 }
1385
1386 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1387 {
1388 int sat = 0;
1389 int i;
1390
1391 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1392 int64_t t = (int64_t)b->s32[i];
1393
1394 t += a->s16[2 * i] + a->s16[2 * i + 1];
1395 r->s32[i] = cvtsdsw(t, &sat);
1396 }
1397
1398 if (sat) {
1399 env->vscr |= (1 << VSCR_SAT);
1400 }
1401 }
1402
1403 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1404 {
1405 int i, j;
1406 int sat = 0;
1407
1408 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1409 uint64_t t = (uint64_t)b->u32[i];
1410
1411 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1412 t += a->u8[4 * i + j];
1413 }
1414 r->u32[i] = cvtuduw(t, &sat);
1415 }
1416
1417 if (sat) {
1418 env->vscr |= (1 << VSCR_SAT);
1419 }
1420 }
1421
1422 #if defined(HOST_WORDS_BIGENDIAN)
1423 #define UPKHI 1
1424 #define UPKLO 0
1425 #else
1426 #define UPKHI 0
1427 #define UPKLO 1
1428 #endif
1429 #define VUPKPX(suffix, hi) \
1430 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1431 { \
1432 int i; \
1433 ppc_avr_t result; \
1434 \
1435 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1436 uint16_t e = b->u16[hi ? i : i+4]; \
1437 uint8_t a = (e >> 15) ? 0xff : 0; \
1438 uint8_t r = (e >> 10) & 0x1f; \
1439 uint8_t g = (e >> 5) & 0x1f; \
1440 uint8_t b = e & 0x1f; \
1441 \
1442 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1443 } \
1444 *r = result; \
1445 }
1446 VUPKPX(lpx, UPKLO)
1447 VUPKPX(hpx, UPKHI)
1448 #undef VUPKPX
1449
1450 #define VUPK(suffix, unpacked, packee, hi) \
1451 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1452 { \
1453 int i; \
1454 ppc_avr_t result; \
1455 \
1456 if (hi) { \
1457 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1458 result.unpacked[i] = b->packee[i]; \
1459 } \
1460 } else { \
1461 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1462 i++) { \
1463 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1464 } \
1465 } \
1466 *r = result; \
1467 }
1468 VUPK(hsb, s16, s8, UPKHI)
1469 VUPK(hsh, s32, s16, UPKHI)
1470 VUPK(lsb, s16, s8, UPKLO)
1471 VUPK(lsh, s32, s16, UPKLO)
1472 #undef VUPK
1473 #undef UPKHI
1474 #undef UPKLO
1475
1476 #undef DO_HANDLE_NAN
1477 #undef HANDLE_NAN1
1478 #undef HANDLE_NAN2
1479 #undef HANDLE_NAN3
1480 #undef VECTOR_FOR_INORDER_I
1481 #undef HI_IDX
1482 #undef LO_IDX
1483
1484 /*****************************************************************************/
1485 /* SPE extension helpers */
1486 /* Use a table to make this quicker */
1487 static const uint8_t hbrev[16] = {
1488 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1489 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1490 };
1491
1492 static inline uint8_t byte_reverse(uint8_t val)
1493 {
1494 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1495 }
1496
1497 static inline uint32_t word_reverse(uint32_t val)
1498 {
1499 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1500 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1501 }
1502
1503 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1504 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1505 {
1506 uint32_t a, b, d, mask;
1507
1508 mask = UINT32_MAX >> (32 - MASKBITS);
1509 a = arg1 & mask;
1510 b = arg2 & mask;
1511 d = word_reverse(1 + word_reverse(a | ~b));
1512 return (arg1 & ~mask) | (d & b);
1513 }
1514
1515 uint32_t helper_cntlsw32(uint32_t val)
1516 {
1517 if (val & 0x80000000) {
1518 return clz32(~val);
1519 } else {
1520 return clz32(val);
1521 }
1522 }
1523
1524 uint32_t helper_cntlzw32(uint32_t val)
1525 {
1526 return clz32(val);
1527 }
1528
1529 /* 440 specific */
1530 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1531 target_ulong low, uint32_t update_Rc)
1532 {
1533 target_ulong mask;
1534 int i;
1535
1536 i = 1;
1537 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1538 if ((high & mask) == 0) {
1539 if (update_Rc) {
1540 env->crf[0] = 0x4;
1541 }
1542 goto done;
1543 }
1544 i++;
1545 }
1546 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1547 if ((low & mask) == 0) {
1548 if (update_Rc) {
1549 env->crf[0] = 0x8;
1550 }
1551 goto done;
1552 }
1553 i++;
1554 }
1555 if (update_Rc) {
1556 env->crf[0] = 0x2;
1557 }
1558 done:
1559 env->xer = (env->xer & ~0x7F) | i;
1560 if (update_Rc) {
1561 env->crf[0] |= xer_so;
1562 }
1563 return i;
1564 }