]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/int_helper.c
target-ppc: Altivec 2.07: Change Bit Masks to Support 64-bit Rotates and Shifts
[mirror_qemu.git] / target-ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "helper.h"
22
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
27
28 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
29 {
30 int64_t th;
31 uint64_t tl;
32
33 muls64(&tl, (uint64_t *)&th, arg1, arg2);
34 /* If th != 0 && th != -1, then we had an overflow */
35 if (likely((uint64_t)(th + 1) <= 1)) {
36 env->ov = 0;
37 } else {
38 env->so = env->ov = 1;
39 }
40 return (int64_t)tl;
41 }
42 #endif
43
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
46 {
47 uint64_t rt = 0;
48 int overflow = 0;
49
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
52
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
58 }
59
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
62 }
63
64 if (oe) {
65 if (unlikely(overflow)) {
66 env->so = env->ov = 1;
67 } else {
68 env->ov = 0;
69 }
70 }
71
72 return (target_ulong)rt;
73 }
74
75 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
76 uint32_t oe)
77 {
78 int64_t rt = 0;
79 int overflow = 0;
80
81 int64_t dividend = (int64_t)ra << 32;
82 int64_t divisor = (int64_t)((int32_t)rb);
83
84 if (unlikely((divisor == 0) ||
85 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
86 overflow = 1;
87 } else {
88 rt = dividend / divisor;
89 overflow = rt != (int32_t)rt;
90 }
91
92 if (unlikely(overflow)) {
93 rt = 0; /* Undefined */
94 }
95
96 if (oe) {
97 if (unlikely(overflow)) {
98 env->so = env->ov = 1;
99 } else {
100 env->ov = 0;
101 }
102 }
103
104 return (target_ulong)rt;
105 }
106
107 #if defined(TARGET_PPC64)
108
109 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
110 {
111 uint64_t rt = 0;
112 int overflow = 0;
113
114 overflow = divu128(&rt, &ra, rb);
115
116 if (unlikely(overflow)) {
117 rt = 0; /* Undefined */
118 }
119
120 if (oe) {
121 if (unlikely(overflow)) {
122 env->so = env->ov = 1;
123 } else {
124 env->ov = 0;
125 }
126 }
127
128 return rt;
129 }
130
131 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
132 {
133 int64_t rt = 0;
134 int64_t ra = (int64_t)rau;
135 int64_t rb = (int64_t)rbu;
136 int overflow = divs128(&rt, &ra, rb);
137
138 if (unlikely(overflow)) {
139 rt = 0; /* Undefined */
140 }
141
142 if (oe) {
143
144 if (unlikely(overflow)) {
145 env->so = env->ov = 1;
146 } else {
147 env->ov = 0;
148 }
149 }
150
151 return rt;
152 }
153
154 #endif
155
156
157 target_ulong helper_cntlzw(target_ulong t)
158 {
159 return clz32(t);
160 }
161
162 #if defined(TARGET_PPC64)
163 target_ulong helper_cntlzd(target_ulong t)
164 {
165 return clz64(t);
166 }
167 #endif
168
169 #if defined(TARGET_PPC64)
170
171 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
172 {
173 int i;
174 uint64_t ra = 0;
175
176 for (i = 0; i < 8; i++) {
177 int index = (rs >> (i*8)) & 0xFF;
178 if (index < 64) {
179 if (rb & (1ull << (63-index))) {
180 ra |= 1 << i;
181 }
182 }
183 }
184 return ra;
185 }
186
187 #endif
188
189 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
190 {
191 target_ulong mask = 0xff;
192 target_ulong ra = 0;
193 int i;
194
195 for (i = 0; i < sizeof(target_ulong); i++) {
196 if ((rs & mask) == (rb & mask)) {
197 ra |= mask;
198 }
199 mask <<= 8;
200 }
201 return ra;
202 }
203
204 /* shift right arithmetic helper */
205 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
206 target_ulong shift)
207 {
208 int32_t ret;
209
210 if (likely(!(shift & 0x20))) {
211 if (likely((uint32_t)shift != 0)) {
212 shift &= 0x1f;
213 ret = (int32_t)value >> shift;
214 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
215 env->ca = 0;
216 } else {
217 env->ca = 1;
218 }
219 } else {
220 ret = (int32_t)value;
221 env->ca = 0;
222 }
223 } else {
224 ret = (int32_t)value >> 31;
225 env->ca = (ret != 0);
226 }
227 return (target_long)ret;
228 }
229
230 #if defined(TARGET_PPC64)
231 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
233 {
234 int64_t ret;
235
236 if (likely(!(shift & 0x40))) {
237 if (likely((uint64_t)shift != 0)) {
238 shift &= 0x3f;
239 ret = (int64_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca = 0;
242 } else {
243 env->ca = 1;
244 }
245 } else {
246 ret = (int64_t)value;
247 env->ca = 0;
248 }
249 } else {
250 ret = (int64_t)value >> 63;
251 env->ca = (ret != 0);
252 }
253 return ret;
254 }
255 #endif
256
257 #if defined(TARGET_PPC64)
258 target_ulong helper_popcntb(target_ulong val)
259 {
260 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
261 0x5555555555555555ULL);
262 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
263 0x3333333333333333ULL);
264 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
265 0x0f0f0f0f0f0f0f0fULL);
266 return val;
267 }
268
269 target_ulong helper_popcntw(target_ulong val)
270 {
271 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
272 0x5555555555555555ULL);
273 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
274 0x3333333333333333ULL);
275 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
276 0x0f0f0f0f0f0f0f0fULL);
277 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
278 0x00ff00ff00ff00ffULL);
279 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
280 0x0000ffff0000ffffULL);
281 return val;
282 }
283
284 target_ulong helper_popcntd(target_ulong val)
285 {
286 return ctpop64(val);
287 }
288 #else
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
292 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
293 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
294 return val;
295 }
296
297 target_ulong helper_popcntw(target_ulong val)
298 {
299 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
300 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
301 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
302 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
303 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
304 return val;
305 }
306 #endif
307
308 /*****************************************************************************/
309 /* PowerPC 601 specific instructions (POWER bridge) */
310 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
311 {
312 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
313
314 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
315 (int32_t)arg2 == 0) {
316 env->spr[SPR_MQ] = 0;
317 return INT32_MIN;
318 } else {
319 env->spr[SPR_MQ] = tmp % arg2;
320 return tmp / (int32_t)arg2;
321 }
322 }
323
324 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
325 target_ulong arg2)
326 {
327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328
329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
330 (int32_t)arg2 == 0) {
331 env->so = env->ov = 1;
332 env->spr[SPR_MQ] = 0;
333 return INT32_MIN;
334 } else {
335 env->spr[SPR_MQ] = tmp % arg2;
336 tmp /= (int32_t)arg2;
337 if ((int32_t)tmp != tmp) {
338 env->so = env->ov = 1;
339 } else {
340 env->ov = 0;
341 }
342 return tmp;
343 }
344 }
345
346 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
347 target_ulong arg2)
348 {
349 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
350 (int32_t)arg2 == 0) {
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
355 return (int32_t)arg1 / (int32_t)arg2;
356 }
357 }
358
359 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
360 target_ulong arg2)
361 {
362 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
363 (int32_t)arg2 == 0) {
364 env->so = env->ov = 1;
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->ov = 0;
369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
370 return (int32_t)arg1 / (int32_t)arg2;
371 }
372 }
373
374 /*****************************************************************************/
375 /* 602 specific instructions */
376 /* mfrom is the most crazy instruction ever seen, imho ! */
377 /* Real implementation uses a ROM table. Do the same */
378 /* Extremely decomposed:
379 * -arg / 256
380 * return 256 * log10(10 + 1.0) + 0.5
381 */
382 #if !defined(CONFIG_USER_ONLY)
383 target_ulong helper_602_mfrom(target_ulong arg)
384 {
385 if (likely(arg < 602)) {
386 #include "mfrom_table.c"
387 return mfrom_ROM_table[arg];
388 } else {
389 return 0;
390 }
391 }
392 #endif
393
394 /*****************************************************************************/
395 /* Altivec extension helpers */
396 #if defined(HOST_WORDS_BIGENDIAN)
397 #define HI_IDX 0
398 #define LO_IDX 1
399 #else
400 #define HI_IDX 1
401 #define LO_IDX 0
402 #endif
403
404 #if defined(HOST_WORDS_BIGENDIAN)
405 #define VECTOR_FOR_INORDER_I(index, element) \
406 for (index = 0; index < ARRAY_SIZE(r->element); index++)
407 #else
408 #define VECTOR_FOR_INORDER_I(index, element) \
409 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
410 #endif
411
412 /* Saturating arithmetic helpers. */
413 #define SATCVT(from, to, from_type, to_type, min, max) \
414 static inline to_type cvt##from##to(from_type x, int *sat) \
415 { \
416 to_type r; \
417 \
418 if (x < (from_type)min) { \
419 r = min; \
420 *sat = 1; \
421 } else if (x > (from_type)max) { \
422 r = max; \
423 *sat = 1; \
424 } else { \
425 r = x; \
426 } \
427 return r; \
428 }
429 #define SATCVTU(from, to, from_type, to_type, min, max) \
430 static inline to_type cvt##from##to(from_type x, int *sat) \
431 { \
432 to_type r; \
433 \
434 if (x > (from_type)max) { \
435 r = max; \
436 *sat = 1; \
437 } else { \
438 r = x; \
439 } \
440 return r; \
441 }
442 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
443 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
444 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
445
446 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
447 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
448 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
449 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
450 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
451 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
452 #undef SATCVT
453 #undef SATCVTU
454
455 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
456 {
457 int i, j = (sh & 0xf);
458
459 VECTOR_FOR_INORDER_I(i, u8) {
460 r->u8[i] = j++;
461 }
462 }
463
464 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
465 {
466 int i, j = 0x10 - (sh & 0xf);
467
468 VECTOR_FOR_INORDER_I(i, u8) {
469 r->u8[i] = j++;
470 }
471 }
472
473 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
474 {
475 #if defined(HOST_WORDS_BIGENDIAN)
476 env->vscr = r->u32[3];
477 #else
478 env->vscr = r->u32[0];
479 #endif
480 set_flush_to_zero(vscr_nj, &env->vec_status);
481 }
482
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
484 {
485 int i;
486
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
489 }
490 }
491
492 #define VARITH_DO(name, op, element) \
493 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
494 { \
495 int i; \
496 \
497 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
498 r->element[i] = a->element[i] op b->element[i]; \
499 } \
500 }
501 #define VARITH(suffix, element) \
502 VARITH_DO(add##suffix, +, element) \
503 VARITH_DO(sub##suffix, -, element)
504 VARITH(ubm, u8)
505 VARITH(uhm, u16)
506 VARITH(uwm, u32)
507 VARITH(udm, u64)
508 VARITH_DO(muluwm, *, u32)
509 #undef VARITH_DO
510 #undef VARITH
511
512 #define VARITHFP(suffix, func) \
513 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
514 ppc_avr_t *b) \
515 { \
516 int i; \
517 \
518 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
519 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
520 } \
521 }
522 VARITHFP(addfp, float32_add)
523 VARITHFP(subfp, float32_sub)
524 VARITHFP(minfp, float32_min)
525 VARITHFP(maxfp, float32_max)
526 #undef VARITHFP
527
528 #define VARITHFPFMA(suffix, type) \
529 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
530 ppc_avr_t *b, ppc_avr_t *c) \
531 { \
532 int i; \
533 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
534 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
535 type, &env->vec_status); \
536 } \
537 }
538 VARITHFPFMA(maddfp, 0);
539 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
540 #undef VARITHFPFMA
541
542 #define VARITHSAT_CASE(type, op, cvt, element) \
543 { \
544 type result = (type)a->element[i] op (type)b->element[i]; \
545 r->element[i] = cvt(result, &sat); \
546 }
547
548 #define VARITHSAT_DO(name, op, optype, cvt, element) \
549 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
550 ppc_avr_t *b) \
551 { \
552 int sat = 0; \
553 int i; \
554 \
555 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
556 switch (sizeof(r->element[0])) { \
557 case 1: \
558 VARITHSAT_CASE(optype, op, cvt, element); \
559 break; \
560 case 2: \
561 VARITHSAT_CASE(optype, op, cvt, element); \
562 break; \
563 case 4: \
564 VARITHSAT_CASE(optype, op, cvt, element); \
565 break; \
566 } \
567 } \
568 if (sat) { \
569 env->vscr |= (1 << VSCR_SAT); \
570 } \
571 }
572 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
573 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
574 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
575 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
576 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
577 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
578 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
579 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
580 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
581 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
582 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
583 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
584 #undef VARITHSAT_CASE
585 #undef VARITHSAT_DO
586 #undef VARITHSAT_SIGNED
587 #undef VARITHSAT_UNSIGNED
588
589 #define VAVG_DO(name, element, etype) \
590 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
591 { \
592 int i; \
593 \
594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
595 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
596 r->element[i] = x >> 1; \
597 } \
598 }
599
600 #define VAVG(type, signed_element, signed_type, unsigned_element, \
601 unsigned_type) \
602 VAVG_DO(avgs##type, signed_element, signed_type) \
603 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
604 VAVG(b, s8, int16_t, u8, uint16_t)
605 VAVG(h, s16, int32_t, u16, uint32_t)
606 VAVG(w, s32, int64_t, u32, uint64_t)
607 #undef VAVG_DO
608 #undef VAVG
609
610 #define VCF(suffix, cvt, element) \
611 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
612 ppc_avr_t *b, uint32_t uim) \
613 { \
614 int i; \
615 \
616 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
617 float32 t = cvt(b->element[i], &env->vec_status); \
618 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
619 } \
620 }
621 VCF(ux, uint32_to_float32, u32)
622 VCF(sx, int32_to_float32, s32)
623 #undef VCF
624
625 #define VCMP_DO(suffix, compare, element, record) \
626 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
627 ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 uint32_t ones = (uint32_t)-1; \
630 uint32_t all = ones; \
631 uint32_t none = 0; \
632 int i; \
633 \
634 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
635 uint32_t result = (a->element[i] compare b->element[i] ? \
636 ones : 0x0); \
637 switch (sizeof(a->element[0])) { \
638 case 4: \
639 r->u32[i] = result; \
640 break; \
641 case 2: \
642 r->u16[i] = result; \
643 break; \
644 case 1: \
645 r->u8[i] = result; \
646 break; \
647 } \
648 all &= result; \
649 none |= result; \
650 } \
651 if (record) { \
652 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
653 } \
654 }
655 #define VCMP(suffix, compare, element) \
656 VCMP_DO(suffix, compare, element, 0) \
657 VCMP_DO(suffix##_dot, compare, element, 1)
658 VCMP(equb, ==, u8)
659 VCMP(equh, ==, u16)
660 VCMP(equw, ==, u32)
661 VCMP(gtub, >, u8)
662 VCMP(gtuh, >, u16)
663 VCMP(gtuw, >, u32)
664 VCMP(gtsb, >, s8)
665 VCMP(gtsh, >, s16)
666 VCMP(gtsw, >, s32)
667 #undef VCMP_DO
668 #undef VCMP
669
670 #define VCMPFP_DO(suffix, compare, order, record) \
671 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
672 ppc_avr_t *a, ppc_avr_t *b) \
673 { \
674 uint32_t ones = (uint32_t)-1; \
675 uint32_t all = ones; \
676 uint32_t none = 0; \
677 int i; \
678 \
679 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
680 uint32_t result; \
681 int rel = float32_compare_quiet(a->f[i], b->f[i], \
682 &env->vec_status); \
683 if (rel == float_relation_unordered) { \
684 result = 0; \
685 } else if (rel compare order) { \
686 result = ones; \
687 } else { \
688 result = 0; \
689 } \
690 r->u32[i] = result; \
691 all &= result; \
692 none |= result; \
693 } \
694 if (record) { \
695 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
696 } \
697 }
698 #define VCMPFP(suffix, compare, order) \
699 VCMPFP_DO(suffix, compare, order, 0) \
700 VCMPFP_DO(suffix##_dot, compare, order, 1)
701 VCMPFP(eqfp, ==, float_relation_equal)
702 VCMPFP(gefp, !=, float_relation_less)
703 VCMPFP(gtfp, ==, float_relation_greater)
704 #undef VCMPFP_DO
705 #undef VCMPFP
706
707 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
708 ppc_avr_t *a, ppc_avr_t *b, int record)
709 {
710 int i;
711 int all_in = 0;
712
713 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
714 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
715 if (le_rel == float_relation_unordered) {
716 r->u32[i] = 0xc0000000;
717 /* ALL_IN does not need to be updated here. */
718 } else {
719 float32 bneg = float32_chs(b->f[i]);
720 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
721 int le = le_rel != float_relation_greater;
722 int ge = ge_rel != float_relation_less;
723
724 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
725 all_in |= (!le | !ge);
726 }
727 }
728 if (record) {
729 env->crf[6] = (all_in == 0) << 1;
730 }
731 }
732
733 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
734 {
735 vcmpbfp_internal(env, r, a, b, 0);
736 }
737
738 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
739 ppc_avr_t *b)
740 {
741 vcmpbfp_internal(env, r, a, b, 1);
742 }
743
744 #define VCT(suffix, satcvt, element) \
745 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
746 ppc_avr_t *b, uint32_t uim) \
747 { \
748 int i; \
749 int sat = 0; \
750 float_status s = env->vec_status; \
751 \
752 set_float_rounding_mode(float_round_to_zero, &s); \
753 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
754 if (float32_is_any_nan(b->f[i])) { \
755 r->element[i] = 0; \
756 } else { \
757 float64 t = float32_to_float64(b->f[i], &s); \
758 int64_t j; \
759 \
760 t = float64_scalbn(t, uim, &s); \
761 j = float64_to_int64(t, &s); \
762 r->element[i] = satcvt(j, &sat); \
763 } \
764 } \
765 if (sat) { \
766 env->vscr |= (1 << VSCR_SAT); \
767 } \
768 }
769 VCT(uxs, cvtsduw, u32)
770 VCT(sxs, cvtsdsw, s32)
771 #undef VCT
772
773 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
774 ppc_avr_t *b, ppc_avr_t *c)
775 {
776 int sat = 0;
777 int i;
778
779 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
780 int32_t prod = a->s16[i] * b->s16[i];
781 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
782
783 r->s16[i] = cvtswsh(t, &sat);
784 }
785
786 if (sat) {
787 env->vscr |= (1 << VSCR_SAT);
788 }
789 }
790
791 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
792 ppc_avr_t *b, ppc_avr_t *c)
793 {
794 int sat = 0;
795 int i;
796
797 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
798 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
799 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
800 r->s16[i] = cvtswsh(t, &sat);
801 }
802
803 if (sat) {
804 env->vscr |= (1 << VSCR_SAT);
805 }
806 }
807
808 #define VMINMAX_DO(name, compare, element) \
809 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
810 { \
811 int i; \
812 \
813 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
814 if (a->element[i] compare b->element[i]) { \
815 r->element[i] = b->element[i]; \
816 } else { \
817 r->element[i] = a->element[i]; \
818 } \
819 } \
820 }
821 #define VMINMAX(suffix, element) \
822 VMINMAX_DO(min##suffix, >, element) \
823 VMINMAX_DO(max##suffix, <, element)
824 VMINMAX(sb, s8)
825 VMINMAX(sh, s16)
826 VMINMAX(sw, s32)
827 VMINMAX(sd, s64)
828 VMINMAX(ub, u8)
829 VMINMAX(uh, u16)
830 VMINMAX(uw, u32)
831 VMINMAX(ud, u64)
832 #undef VMINMAX_DO
833 #undef VMINMAX
834
835 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
836 {
837 int i;
838
839 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
840 int32_t prod = a->s16[i] * b->s16[i];
841 r->s16[i] = (int16_t) (prod + c->s16[i]);
842 }
843 }
844
845 #define VMRG_DO(name, element, highp) \
846 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
847 { \
848 ppc_avr_t result; \
849 int i; \
850 size_t n_elems = ARRAY_SIZE(r->element); \
851 \
852 for (i = 0; i < n_elems / 2; i++) { \
853 if (highp) { \
854 result.element[i*2+HI_IDX] = a->element[i]; \
855 result.element[i*2+LO_IDX] = b->element[i]; \
856 } else { \
857 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
858 b->element[n_elems - i - 1]; \
859 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
860 a->element[n_elems - i - 1]; \
861 } \
862 } \
863 *r = result; \
864 }
865 #if defined(HOST_WORDS_BIGENDIAN)
866 #define MRGHI 0
867 #define MRGLO 1
868 #else
869 #define MRGHI 1
870 #define MRGLO 0
871 #endif
872 #define VMRG(suffix, element) \
873 VMRG_DO(mrgl##suffix, element, MRGHI) \
874 VMRG_DO(mrgh##suffix, element, MRGLO)
875 VMRG(b, u8)
876 VMRG(h, u16)
877 VMRG(w, u32)
878 #undef VMRG_DO
879 #undef VMRG
880 #undef MRGHI
881 #undef MRGLO
882
883 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
884 ppc_avr_t *b, ppc_avr_t *c)
885 {
886 int32_t prod[16];
887 int i;
888
889 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
890 prod[i] = (int32_t)a->s8[i] * b->u8[i];
891 }
892
893 VECTOR_FOR_INORDER_I(i, s32) {
894 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
895 prod[4 * i + 2] + prod[4 * i + 3];
896 }
897 }
898
899 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
900 ppc_avr_t *b, ppc_avr_t *c)
901 {
902 int32_t prod[8];
903 int i;
904
905 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
906 prod[i] = a->s16[i] * b->s16[i];
907 }
908
909 VECTOR_FOR_INORDER_I(i, s32) {
910 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
911 }
912 }
913
914 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
915 ppc_avr_t *b, ppc_avr_t *c)
916 {
917 int32_t prod[8];
918 int i;
919 int sat = 0;
920
921 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
922 prod[i] = (int32_t)a->s16[i] * b->s16[i];
923 }
924
925 VECTOR_FOR_INORDER_I(i, s32) {
926 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
927
928 r->u32[i] = cvtsdsw(t, &sat);
929 }
930
931 if (sat) {
932 env->vscr |= (1 << VSCR_SAT);
933 }
934 }
935
936 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
937 ppc_avr_t *b, ppc_avr_t *c)
938 {
939 uint16_t prod[16];
940 int i;
941
942 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
943 prod[i] = a->u8[i] * b->u8[i];
944 }
945
946 VECTOR_FOR_INORDER_I(i, u32) {
947 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
948 prod[4 * i + 2] + prod[4 * i + 3];
949 }
950 }
951
952 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
953 ppc_avr_t *b, ppc_avr_t *c)
954 {
955 uint32_t prod[8];
956 int i;
957
958 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
959 prod[i] = a->u16[i] * b->u16[i];
960 }
961
962 VECTOR_FOR_INORDER_I(i, u32) {
963 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
964 }
965 }
966
967 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
968 ppc_avr_t *b, ppc_avr_t *c)
969 {
970 uint32_t prod[8];
971 int i;
972 int sat = 0;
973
974 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
975 prod[i] = a->u16[i] * b->u16[i];
976 }
977
978 VECTOR_FOR_INORDER_I(i, s32) {
979 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
980
981 r->u32[i] = cvtuduw(t, &sat);
982 }
983
984 if (sat) {
985 env->vscr |= (1 << VSCR_SAT);
986 }
987 }
988
989 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
990 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
991 { \
992 int i; \
993 \
994 VECTOR_FOR_INORDER_I(i, prod_element) { \
995 if (evenp) { \
996 r->prod_element[i] = \
997 (cast)a->mul_element[i * 2 + HI_IDX] * \
998 (cast)b->mul_element[i * 2 + HI_IDX]; \
999 } else { \
1000 r->prod_element[i] = \
1001 (cast)a->mul_element[i * 2 + LO_IDX] * \
1002 (cast)b->mul_element[i * 2 + LO_IDX]; \
1003 } \
1004 } \
1005 }
1006 #define VMUL(suffix, mul_element, prod_element, cast) \
1007 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1008 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1009 VMUL(sb, s8, s16, int16_t)
1010 VMUL(sh, s16, s32, int32_t)
1011 VMUL(sw, s32, s64, int64_t)
1012 VMUL(ub, u8, u16, uint16_t)
1013 VMUL(uh, u16, u32, uint32_t)
1014 VMUL(uw, u32, u64, uint64_t)
1015 #undef VMUL_DO
1016 #undef VMUL
1017
1018 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1019 ppc_avr_t *c)
1020 {
1021 ppc_avr_t result;
1022 int i;
1023
1024 VECTOR_FOR_INORDER_I(i, u8) {
1025 int s = c->u8[i] & 0x1f;
1026 #if defined(HOST_WORDS_BIGENDIAN)
1027 int index = s & 0xf;
1028 #else
1029 int index = 15 - (s & 0xf);
1030 #endif
1031
1032 if (s & 0x10) {
1033 result.u8[i] = b->u8[index];
1034 } else {
1035 result.u8[i] = a->u8[index];
1036 }
1037 }
1038 *r = result;
1039 }
1040
1041 #if defined(HOST_WORDS_BIGENDIAN)
1042 #define PKBIG 1
1043 #else
1044 #define PKBIG 0
1045 #endif
1046 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1047 {
1048 int i, j;
1049 ppc_avr_t result;
1050 #if defined(HOST_WORDS_BIGENDIAN)
1051 const ppc_avr_t *x[2] = { a, b };
1052 #else
1053 const ppc_avr_t *x[2] = { b, a };
1054 #endif
1055
1056 VECTOR_FOR_INORDER_I(i, u64) {
1057 VECTOR_FOR_INORDER_I(j, u32) {
1058 uint32_t e = x[i]->u32[j];
1059
1060 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1061 ((e >> 6) & 0x3e0) |
1062 ((e >> 3) & 0x1f));
1063 }
1064 }
1065 *r = result;
1066 }
1067
1068 #define VPK(suffix, from, to, cvt, dosat) \
1069 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1070 ppc_avr_t *a, ppc_avr_t *b) \
1071 { \
1072 int i; \
1073 int sat = 0; \
1074 ppc_avr_t result; \
1075 ppc_avr_t *a0 = PKBIG ? a : b; \
1076 ppc_avr_t *a1 = PKBIG ? b : a; \
1077 \
1078 VECTOR_FOR_INORDER_I(i, from) { \
1079 result.to[i] = cvt(a0->from[i], &sat); \
1080 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1081 } \
1082 *r = result; \
1083 if (dosat && sat) { \
1084 env->vscr |= (1 << VSCR_SAT); \
1085 } \
1086 }
1087 #define I(x, y) (x)
1088 VPK(shss, s16, s8, cvtshsb, 1)
1089 VPK(shus, s16, u8, cvtshub, 1)
1090 VPK(swss, s32, s16, cvtswsh, 1)
1091 VPK(swus, s32, u16, cvtswuh, 1)
1092 VPK(sdss, s64, s32, cvtsdsw, 1)
1093 VPK(sdus, s64, u32, cvtsduw, 1)
1094 VPK(uhus, u16, u8, cvtuhub, 1)
1095 VPK(uwus, u32, u16, cvtuwuh, 1)
1096 VPK(udus, u64, u32, cvtuduw, 1)
1097 VPK(uhum, u16, u8, I, 0)
1098 VPK(uwum, u32, u16, I, 0)
1099 VPK(udum, u64, u32, I, 0)
1100 #undef I
1101 #undef VPK
1102 #undef PKBIG
1103
1104 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1105 {
1106 int i;
1107
1108 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1109 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1110 }
1111 }
1112
1113 #define VRFI(suffix, rounding) \
1114 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1115 ppc_avr_t *b) \
1116 { \
1117 int i; \
1118 float_status s = env->vec_status; \
1119 \
1120 set_float_rounding_mode(rounding, &s); \
1121 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1122 r->f[i] = float32_round_to_int (b->f[i], &s); \
1123 } \
1124 }
1125 VRFI(n, float_round_nearest_even)
1126 VRFI(m, float_round_down)
1127 VRFI(p, float_round_up)
1128 VRFI(z, float_round_to_zero)
1129 #undef VRFI
1130
1131 #define VROTATE(suffix, element, mask) \
1132 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1133 { \
1134 int i; \
1135 \
1136 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1137 unsigned int shift = b->element[i] & mask; \
1138 r->element[i] = (a->element[i] << shift) | \
1139 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1140 } \
1141 }
1142 VROTATE(b, u8, 0x7)
1143 VROTATE(h, u16, 0xF)
1144 VROTATE(w, u32, 0x1F)
1145 #undef VROTATE
1146
1147 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1148 {
1149 int i;
1150
1151 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1152 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1153
1154 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1155 }
1156 }
1157
1158 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1159 ppc_avr_t *c)
1160 {
1161 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1162 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1163 }
1164
1165 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1166 {
1167 int i;
1168
1169 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1170 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1171 }
1172 }
1173
1174 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1175 {
1176 int i;
1177
1178 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1179 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1180 }
1181 }
1182
1183 #if defined(HOST_WORDS_BIGENDIAN)
1184 #define LEFT 0
1185 #define RIGHT 1
1186 #else
1187 #define LEFT 1
1188 #define RIGHT 0
1189 #endif
1190 /* The specification says that the results are undefined if all of the
1191 * shift counts are not identical. We check to make sure that they are
1192 * to conform to what real hardware appears to do. */
1193 #define VSHIFT(suffix, leftp) \
1194 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1195 { \
1196 int shift = b->u8[LO_IDX*15] & 0x7; \
1197 int doit = 1; \
1198 int i; \
1199 \
1200 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1201 doit = doit && ((b->u8[i] & 0x7) == shift); \
1202 } \
1203 if (doit) { \
1204 if (shift == 0) { \
1205 *r = *a; \
1206 } else if (leftp) { \
1207 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1208 \
1209 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1210 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1211 } else { \
1212 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1213 \
1214 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1215 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1216 } \
1217 } \
1218 }
1219 VSHIFT(l, LEFT)
1220 VSHIFT(r, RIGHT)
1221 #undef VSHIFT
1222 #undef LEFT
1223 #undef RIGHT
1224
1225 #define VSL(suffix, element, mask) \
1226 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1227 { \
1228 int i; \
1229 \
1230 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1231 unsigned int shift = b->element[i] & mask; \
1232 \
1233 r->element[i] = a->element[i] << shift; \
1234 } \
1235 }
1236 VSL(b, u8, 0x7)
1237 VSL(h, u16, 0x0F)
1238 VSL(w, u32, 0x1F)
1239 #undef VSL
1240
1241 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1242 {
1243 int sh = shift & 0xf;
1244 int i;
1245 ppc_avr_t result;
1246
1247 #if defined(HOST_WORDS_BIGENDIAN)
1248 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1249 int index = sh + i;
1250 if (index > 0xf) {
1251 result.u8[i] = b->u8[index - 0x10];
1252 } else {
1253 result.u8[i] = a->u8[index];
1254 }
1255 }
1256 #else
1257 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1258 int index = (16 - sh) + i;
1259 if (index > 0xf) {
1260 result.u8[i] = a->u8[index - 0x10];
1261 } else {
1262 result.u8[i] = b->u8[index];
1263 }
1264 }
1265 #endif
1266 *r = result;
1267 }
1268
1269 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1270 {
1271 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1272
1273 #if defined(HOST_WORDS_BIGENDIAN)
1274 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1275 memset(&r->u8[16-sh], 0, sh);
1276 #else
1277 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1278 memset(&r->u8[0], 0, sh);
1279 #endif
1280 }
1281
1282 /* Experimental testing shows that hardware masks the immediate. */
1283 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1284 #if defined(HOST_WORDS_BIGENDIAN)
1285 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1286 #else
1287 #define SPLAT_ELEMENT(element) \
1288 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1289 #endif
1290 #define VSPLT(suffix, element) \
1291 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1292 { \
1293 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1294 int i; \
1295 \
1296 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1297 r->element[i] = s; \
1298 } \
1299 }
1300 VSPLT(b, u8)
1301 VSPLT(h, u16)
1302 VSPLT(w, u32)
1303 #undef VSPLT
1304 #undef SPLAT_ELEMENT
1305 #undef _SPLAT_MASKED
1306
1307 #define VSPLTI(suffix, element, splat_type) \
1308 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1309 { \
1310 splat_type x = (int8_t)(splat << 3) >> 3; \
1311 int i; \
1312 \
1313 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1314 r->element[i] = x; \
1315 } \
1316 }
1317 VSPLTI(b, s8, int8_t)
1318 VSPLTI(h, s16, int16_t)
1319 VSPLTI(w, s32, int32_t)
1320 #undef VSPLTI
1321
1322 #define VSR(suffix, element, mask) \
1323 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1324 { \
1325 int i; \
1326 \
1327 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1328 unsigned int shift = b->element[i] & mask; \
1329 r->element[i] = a->element[i] >> shift; \
1330 } \
1331 }
1332 VSR(ab, s8, 0x7)
1333 VSR(ah, s16, 0xF)
1334 VSR(aw, s32, 0x1F)
1335 VSR(b, u8, 0x7)
1336 VSR(h, u16, 0xF)
1337 VSR(w, u32, 0x1F)
1338 #undef VSR
1339
1340 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1341 {
1342 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1343
1344 #if defined(HOST_WORDS_BIGENDIAN)
1345 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1346 memset(&r->u8[0], 0, sh);
1347 #else
1348 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1349 memset(&r->u8[16 - sh], 0, sh);
1350 #endif
1351 }
1352
1353 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1354 {
1355 int i;
1356
1357 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1358 r->u32[i] = a->u32[i] >= b->u32[i];
1359 }
1360 }
1361
1362 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1363 {
1364 int64_t t;
1365 int i, upper;
1366 ppc_avr_t result;
1367 int sat = 0;
1368
1369 #if defined(HOST_WORDS_BIGENDIAN)
1370 upper = ARRAY_SIZE(r->s32)-1;
1371 #else
1372 upper = 0;
1373 #endif
1374 t = (int64_t)b->s32[upper];
1375 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1376 t += a->s32[i];
1377 result.s32[i] = 0;
1378 }
1379 result.s32[upper] = cvtsdsw(t, &sat);
1380 *r = result;
1381
1382 if (sat) {
1383 env->vscr |= (1 << VSCR_SAT);
1384 }
1385 }
1386
1387 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1388 {
1389 int i, j, upper;
1390 ppc_avr_t result;
1391 int sat = 0;
1392
1393 #if defined(HOST_WORDS_BIGENDIAN)
1394 upper = 1;
1395 #else
1396 upper = 0;
1397 #endif
1398 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1399 int64_t t = (int64_t)b->s32[upper + i * 2];
1400
1401 result.u64[i] = 0;
1402 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1403 t += a->s32[2 * i + j];
1404 }
1405 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1406 }
1407
1408 *r = result;
1409 if (sat) {
1410 env->vscr |= (1 << VSCR_SAT);
1411 }
1412 }
1413
1414 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1415 {
1416 int i, j;
1417 int sat = 0;
1418
1419 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1420 int64_t t = (int64_t)b->s32[i];
1421
1422 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1423 t += a->s8[4 * i + j];
1424 }
1425 r->s32[i] = cvtsdsw(t, &sat);
1426 }
1427
1428 if (sat) {
1429 env->vscr |= (1 << VSCR_SAT);
1430 }
1431 }
1432
1433 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1434 {
1435 int sat = 0;
1436 int i;
1437
1438 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1439 int64_t t = (int64_t)b->s32[i];
1440
1441 t += a->s16[2 * i] + a->s16[2 * i + 1];
1442 r->s32[i] = cvtsdsw(t, &sat);
1443 }
1444
1445 if (sat) {
1446 env->vscr |= (1 << VSCR_SAT);
1447 }
1448 }
1449
1450 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1451 {
1452 int i, j;
1453 int sat = 0;
1454
1455 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1456 uint64_t t = (uint64_t)b->u32[i];
1457
1458 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1459 t += a->u8[4 * i + j];
1460 }
1461 r->u32[i] = cvtuduw(t, &sat);
1462 }
1463
1464 if (sat) {
1465 env->vscr |= (1 << VSCR_SAT);
1466 }
1467 }
1468
1469 #if defined(HOST_WORDS_BIGENDIAN)
1470 #define UPKHI 1
1471 #define UPKLO 0
1472 #else
1473 #define UPKHI 0
1474 #define UPKLO 1
1475 #endif
1476 #define VUPKPX(suffix, hi) \
1477 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1478 { \
1479 int i; \
1480 ppc_avr_t result; \
1481 \
1482 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1483 uint16_t e = b->u16[hi ? i : i+4]; \
1484 uint8_t a = (e >> 15) ? 0xff : 0; \
1485 uint8_t r = (e >> 10) & 0x1f; \
1486 uint8_t g = (e >> 5) & 0x1f; \
1487 uint8_t b = e & 0x1f; \
1488 \
1489 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1490 } \
1491 *r = result; \
1492 }
1493 VUPKPX(lpx, UPKLO)
1494 VUPKPX(hpx, UPKHI)
1495 #undef VUPKPX
1496
1497 #define VUPK(suffix, unpacked, packee, hi) \
1498 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1499 { \
1500 int i; \
1501 ppc_avr_t result; \
1502 \
1503 if (hi) { \
1504 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1505 result.unpacked[i] = b->packee[i]; \
1506 } \
1507 } else { \
1508 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1509 i++) { \
1510 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1511 } \
1512 } \
1513 *r = result; \
1514 }
1515 VUPK(hsb, s16, s8, UPKHI)
1516 VUPK(hsh, s32, s16, UPKHI)
1517 VUPK(hsw, s64, s32, UPKHI)
1518 VUPK(lsb, s16, s8, UPKLO)
1519 VUPK(lsh, s32, s16, UPKLO)
1520 VUPK(lsw, s64, s32, UPKLO)
1521 #undef VUPK
1522 #undef UPKHI
1523 #undef UPKLO
1524
1525 #define VGENERIC_DO(name, element) \
1526 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1527 { \
1528 int i; \
1529 \
1530 VECTOR_FOR_INORDER_I(i, element) { \
1531 r->element[i] = name(b->element[i]); \
1532 } \
1533 }
1534
1535 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1536 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1537 #define clzw(v) clz32((v))
1538 #define clzd(v) clz64((v))
1539
1540 VGENERIC_DO(clzb, u8)
1541 VGENERIC_DO(clzh, u16)
1542 VGENERIC_DO(clzw, u32)
1543 VGENERIC_DO(clzd, u64)
1544
1545 #undef clzb
1546 #undef clzh
1547 #undef clzw
1548 #undef clzd
1549
1550 #define popcntb(v) ctpop8(v)
1551 #define popcnth(v) ctpop16(v)
1552 #define popcntw(v) ctpop32(v)
1553 #define popcntd(v) ctpop64(v)
1554
1555 VGENERIC_DO(popcntb, u8)
1556 VGENERIC_DO(popcnth, u16)
1557 VGENERIC_DO(popcntw, u32)
1558 VGENERIC_DO(popcntd, u64)
1559
1560 #undef popcntb
1561 #undef popcnth
1562 #undef popcntw
1563 #undef popcntd
1564
1565 #undef VGENERIC_DO
1566
1567
1568 #undef VECTOR_FOR_INORDER_I
1569 #undef HI_IDX
1570 #undef LO_IDX
1571
1572 /*****************************************************************************/
1573 /* SPE extension helpers */
1574 /* Use a table to make this quicker */
1575 static const uint8_t hbrev[16] = {
1576 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1577 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1578 };
1579
1580 static inline uint8_t byte_reverse(uint8_t val)
1581 {
1582 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1583 }
1584
1585 static inline uint32_t word_reverse(uint32_t val)
1586 {
1587 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1588 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1589 }
1590
1591 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1592 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1593 {
1594 uint32_t a, b, d, mask;
1595
1596 mask = UINT32_MAX >> (32 - MASKBITS);
1597 a = arg1 & mask;
1598 b = arg2 & mask;
1599 d = word_reverse(1 + word_reverse(a | ~b));
1600 return (arg1 & ~mask) | (d & b);
1601 }
1602
1603 uint32_t helper_cntlsw32(uint32_t val)
1604 {
1605 if (val & 0x80000000) {
1606 return clz32(~val);
1607 } else {
1608 return clz32(val);
1609 }
1610 }
1611
1612 uint32_t helper_cntlzw32(uint32_t val)
1613 {
1614 return clz32(val);
1615 }
1616
1617 /* 440 specific */
1618 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1619 target_ulong low, uint32_t update_Rc)
1620 {
1621 target_ulong mask;
1622 int i;
1623
1624 i = 1;
1625 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1626 if ((high & mask) == 0) {
1627 if (update_Rc) {
1628 env->crf[0] = 0x4;
1629 }
1630 goto done;
1631 }
1632 i++;
1633 }
1634 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1635 if ((low & mask) == 0) {
1636 if (update_Rc) {
1637 env->crf[0] = 0x8;
1638 }
1639 goto done;
1640 }
1641 i++;
1642 }
1643 if (update_Rc) {
1644 env->crf[0] = 0x2;
1645 }
1646 done:
1647 env->xer = (env->xer & ~0x7F) | i;
1648 if (update_Rc) {
1649 env->crf[0] |= xer_so;
1650 }
1651 return i;
1652 }