]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/int_helper.c
target-ppc: Altivec 2.07: Vector Polynomial Multiply Sum
[mirror_qemu.git] / target-ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "helper.h"
22
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
27
28 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
29 {
30 int64_t th;
31 uint64_t tl;
32
33 muls64(&tl, (uint64_t *)&th, arg1, arg2);
34 /* If th != 0 && th != -1, then we had an overflow */
35 if (likely((uint64_t)(th + 1) <= 1)) {
36 env->ov = 0;
37 } else {
38 env->so = env->ov = 1;
39 }
40 return (int64_t)tl;
41 }
42 #endif
43
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
46 {
47 uint64_t rt = 0;
48 int overflow = 0;
49
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
52
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
58 }
59
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
62 }
63
64 if (oe) {
65 if (unlikely(overflow)) {
66 env->so = env->ov = 1;
67 } else {
68 env->ov = 0;
69 }
70 }
71
72 return (target_ulong)rt;
73 }
74
75 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
76 uint32_t oe)
77 {
78 int64_t rt = 0;
79 int overflow = 0;
80
81 int64_t dividend = (int64_t)ra << 32;
82 int64_t divisor = (int64_t)((int32_t)rb);
83
84 if (unlikely((divisor == 0) ||
85 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
86 overflow = 1;
87 } else {
88 rt = dividend / divisor;
89 overflow = rt != (int32_t)rt;
90 }
91
92 if (unlikely(overflow)) {
93 rt = 0; /* Undefined */
94 }
95
96 if (oe) {
97 if (unlikely(overflow)) {
98 env->so = env->ov = 1;
99 } else {
100 env->ov = 0;
101 }
102 }
103
104 return (target_ulong)rt;
105 }
106
107 #if defined(TARGET_PPC64)
108
109 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
110 {
111 uint64_t rt = 0;
112 int overflow = 0;
113
114 overflow = divu128(&rt, &ra, rb);
115
116 if (unlikely(overflow)) {
117 rt = 0; /* Undefined */
118 }
119
120 if (oe) {
121 if (unlikely(overflow)) {
122 env->so = env->ov = 1;
123 } else {
124 env->ov = 0;
125 }
126 }
127
128 return rt;
129 }
130
131 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
132 {
133 int64_t rt = 0;
134 int64_t ra = (int64_t)rau;
135 int64_t rb = (int64_t)rbu;
136 int overflow = divs128(&rt, &ra, rb);
137
138 if (unlikely(overflow)) {
139 rt = 0; /* Undefined */
140 }
141
142 if (oe) {
143
144 if (unlikely(overflow)) {
145 env->so = env->ov = 1;
146 } else {
147 env->ov = 0;
148 }
149 }
150
151 return rt;
152 }
153
154 #endif
155
156
157 target_ulong helper_cntlzw(target_ulong t)
158 {
159 return clz32(t);
160 }
161
162 #if defined(TARGET_PPC64)
163 target_ulong helper_cntlzd(target_ulong t)
164 {
165 return clz64(t);
166 }
167 #endif
168
169 #if defined(TARGET_PPC64)
170
171 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
172 {
173 int i;
174 uint64_t ra = 0;
175
176 for (i = 0; i < 8; i++) {
177 int index = (rs >> (i*8)) & 0xFF;
178 if (index < 64) {
179 if (rb & (1ull << (63-index))) {
180 ra |= 1 << i;
181 }
182 }
183 }
184 return ra;
185 }
186
187 #endif
188
189 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
190 {
191 target_ulong mask = 0xff;
192 target_ulong ra = 0;
193 int i;
194
195 for (i = 0; i < sizeof(target_ulong); i++) {
196 if ((rs & mask) == (rb & mask)) {
197 ra |= mask;
198 }
199 mask <<= 8;
200 }
201 return ra;
202 }
203
204 /* shift right arithmetic helper */
205 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
206 target_ulong shift)
207 {
208 int32_t ret;
209
210 if (likely(!(shift & 0x20))) {
211 if (likely((uint32_t)shift != 0)) {
212 shift &= 0x1f;
213 ret = (int32_t)value >> shift;
214 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
215 env->ca = 0;
216 } else {
217 env->ca = 1;
218 }
219 } else {
220 ret = (int32_t)value;
221 env->ca = 0;
222 }
223 } else {
224 ret = (int32_t)value >> 31;
225 env->ca = (ret != 0);
226 }
227 return (target_long)ret;
228 }
229
230 #if defined(TARGET_PPC64)
231 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
233 {
234 int64_t ret;
235
236 if (likely(!(shift & 0x40))) {
237 if (likely((uint64_t)shift != 0)) {
238 shift &= 0x3f;
239 ret = (int64_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca = 0;
242 } else {
243 env->ca = 1;
244 }
245 } else {
246 ret = (int64_t)value;
247 env->ca = 0;
248 }
249 } else {
250 ret = (int64_t)value >> 63;
251 env->ca = (ret != 0);
252 }
253 return ret;
254 }
255 #endif
256
257 #if defined(TARGET_PPC64)
258 target_ulong helper_popcntb(target_ulong val)
259 {
260 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
261 0x5555555555555555ULL);
262 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
263 0x3333333333333333ULL);
264 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
265 0x0f0f0f0f0f0f0f0fULL);
266 return val;
267 }
268
269 target_ulong helper_popcntw(target_ulong val)
270 {
271 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
272 0x5555555555555555ULL);
273 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
274 0x3333333333333333ULL);
275 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
276 0x0f0f0f0f0f0f0f0fULL);
277 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
278 0x00ff00ff00ff00ffULL);
279 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
280 0x0000ffff0000ffffULL);
281 return val;
282 }
283
284 target_ulong helper_popcntd(target_ulong val)
285 {
286 return ctpop64(val);
287 }
288 #else
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
292 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
293 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
294 return val;
295 }
296
297 target_ulong helper_popcntw(target_ulong val)
298 {
299 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
300 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
301 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
302 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
303 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
304 return val;
305 }
306 #endif
307
308 /*****************************************************************************/
309 /* PowerPC 601 specific instructions (POWER bridge) */
310 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
311 {
312 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
313
314 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
315 (int32_t)arg2 == 0) {
316 env->spr[SPR_MQ] = 0;
317 return INT32_MIN;
318 } else {
319 env->spr[SPR_MQ] = tmp % arg2;
320 return tmp / (int32_t)arg2;
321 }
322 }
323
324 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
325 target_ulong arg2)
326 {
327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328
329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
330 (int32_t)arg2 == 0) {
331 env->so = env->ov = 1;
332 env->spr[SPR_MQ] = 0;
333 return INT32_MIN;
334 } else {
335 env->spr[SPR_MQ] = tmp % arg2;
336 tmp /= (int32_t)arg2;
337 if ((int32_t)tmp != tmp) {
338 env->so = env->ov = 1;
339 } else {
340 env->ov = 0;
341 }
342 return tmp;
343 }
344 }
345
346 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
347 target_ulong arg2)
348 {
349 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
350 (int32_t)arg2 == 0) {
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
355 return (int32_t)arg1 / (int32_t)arg2;
356 }
357 }
358
359 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
360 target_ulong arg2)
361 {
362 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
363 (int32_t)arg2 == 0) {
364 env->so = env->ov = 1;
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->ov = 0;
369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
370 return (int32_t)arg1 / (int32_t)arg2;
371 }
372 }
373
374 /*****************************************************************************/
375 /* 602 specific instructions */
376 /* mfrom is the most crazy instruction ever seen, imho ! */
377 /* Real implementation uses a ROM table. Do the same */
378 /* Extremely decomposed:
379 * -arg / 256
380 * return 256 * log10(10 + 1.0) + 0.5
381 */
382 #if !defined(CONFIG_USER_ONLY)
383 target_ulong helper_602_mfrom(target_ulong arg)
384 {
385 if (likely(arg < 602)) {
386 #include "mfrom_table.c"
387 return mfrom_ROM_table[arg];
388 } else {
389 return 0;
390 }
391 }
392 #endif
393
394 /*****************************************************************************/
395 /* Altivec extension helpers */
396 #if defined(HOST_WORDS_BIGENDIAN)
397 #define HI_IDX 0
398 #define LO_IDX 1
399 #else
400 #define HI_IDX 1
401 #define LO_IDX 0
402 #endif
403
404 #if defined(HOST_WORDS_BIGENDIAN)
405 #define VECTOR_FOR_INORDER_I(index, element) \
406 for (index = 0; index < ARRAY_SIZE(r->element); index++)
407 #else
408 #define VECTOR_FOR_INORDER_I(index, element) \
409 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
410 #endif
411
412 /* Saturating arithmetic helpers. */
413 #define SATCVT(from, to, from_type, to_type, min, max) \
414 static inline to_type cvt##from##to(from_type x, int *sat) \
415 { \
416 to_type r; \
417 \
418 if (x < (from_type)min) { \
419 r = min; \
420 *sat = 1; \
421 } else if (x > (from_type)max) { \
422 r = max; \
423 *sat = 1; \
424 } else { \
425 r = x; \
426 } \
427 return r; \
428 }
429 #define SATCVTU(from, to, from_type, to_type, min, max) \
430 static inline to_type cvt##from##to(from_type x, int *sat) \
431 { \
432 to_type r; \
433 \
434 if (x > (from_type)max) { \
435 r = max; \
436 *sat = 1; \
437 } else { \
438 r = x; \
439 } \
440 return r; \
441 }
442 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
443 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
444 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
445
446 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
447 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
448 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
449 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
450 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
451 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
452 #undef SATCVT
453 #undef SATCVTU
454
455 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
456 {
457 int i, j = (sh & 0xf);
458
459 VECTOR_FOR_INORDER_I(i, u8) {
460 r->u8[i] = j++;
461 }
462 }
463
464 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
465 {
466 int i, j = 0x10 - (sh & 0xf);
467
468 VECTOR_FOR_INORDER_I(i, u8) {
469 r->u8[i] = j++;
470 }
471 }
472
473 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
474 {
475 #if defined(HOST_WORDS_BIGENDIAN)
476 env->vscr = r->u32[3];
477 #else
478 env->vscr = r->u32[0];
479 #endif
480 set_flush_to_zero(vscr_nj, &env->vec_status);
481 }
482
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
484 {
485 int i;
486
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
489 }
490 }
491
492 #define VARITH_DO(name, op, element) \
493 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
494 { \
495 int i; \
496 \
497 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
498 r->element[i] = a->element[i] op b->element[i]; \
499 } \
500 }
501 #define VARITH(suffix, element) \
502 VARITH_DO(add##suffix, +, element) \
503 VARITH_DO(sub##suffix, -, element)
504 VARITH(ubm, u8)
505 VARITH(uhm, u16)
506 VARITH(uwm, u32)
507 VARITH(udm, u64)
508 VARITH_DO(muluwm, *, u32)
509 #undef VARITH_DO
510 #undef VARITH
511
512 #define VARITHFP(suffix, func) \
513 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
514 ppc_avr_t *b) \
515 { \
516 int i; \
517 \
518 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
519 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
520 } \
521 }
522 VARITHFP(addfp, float32_add)
523 VARITHFP(subfp, float32_sub)
524 VARITHFP(minfp, float32_min)
525 VARITHFP(maxfp, float32_max)
526 #undef VARITHFP
527
528 #define VARITHFPFMA(suffix, type) \
529 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
530 ppc_avr_t *b, ppc_avr_t *c) \
531 { \
532 int i; \
533 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
534 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
535 type, &env->vec_status); \
536 } \
537 }
538 VARITHFPFMA(maddfp, 0);
539 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
540 #undef VARITHFPFMA
541
542 #define VARITHSAT_CASE(type, op, cvt, element) \
543 { \
544 type result = (type)a->element[i] op (type)b->element[i]; \
545 r->element[i] = cvt(result, &sat); \
546 }
547
548 #define VARITHSAT_DO(name, op, optype, cvt, element) \
549 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
550 ppc_avr_t *b) \
551 { \
552 int sat = 0; \
553 int i; \
554 \
555 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
556 switch (sizeof(r->element[0])) { \
557 case 1: \
558 VARITHSAT_CASE(optype, op, cvt, element); \
559 break; \
560 case 2: \
561 VARITHSAT_CASE(optype, op, cvt, element); \
562 break; \
563 case 4: \
564 VARITHSAT_CASE(optype, op, cvt, element); \
565 break; \
566 } \
567 } \
568 if (sat) { \
569 env->vscr |= (1 << VSCR_SAT); \
570 } \
571 }
572 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
573 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
574 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
575 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
576 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
577 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
578 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
579 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
580 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
581 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
582 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
583 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
584 #undef VARITHSAT_CASE
585 #undef VARITHSAT_DO
586 #undef VARITHSAT_SIGNED
587 #undef VARITHSAT_UNSIGNED
588
589 #define VAVG_DO(name, element, etype) \
590 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
591 { \
592 int i; \
593 \
594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
595 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
596 r->element[i] = x >> 1; \
597 } \
598 }
599
600 #define VAVG(type, signed_element, signed_type, unsigned_element, \
601 unsigned_type) \
602 VAVG_DO(avgs##type, signed_element, signed_type) \
603 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
604 VAVG(b, s8, int16_t, u8, uint16_t)
605 VAVG(h, s16, int32_t, u16, uint32_t)
606 VAVG(w, s32, int64_t, u32, uint64_t)
607 #undef VAVG_DO
608 #undef VAVG
609
610 #define VCF(suffix, cvt, element) \
611 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
612 ppc_avr_t *b, uint32_t uim) \
613 { \
614 int i; \
615 \
616 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
617 float32 t = cvt(b->element[i], &env->vec_status); \
618 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
619 } \
620 }
621 VCF(ux, uint32_to_float32, u32)
622 VCF(sx, int32_to_float32, s32)
623 #undef VCF
624
625 #define VCMP_DO(suffix, compare, element, record) \
626 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
627 ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 uint64_t ones = (uint64_t)-1; \
630 uint64_t all = ones; \
631 uint64_t none = 0; \
632 int i; \
633 \
634 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
635 uint64_t result = (a->element[i] compare b->element[i] ? \
636 ones : 0x0); \
637 switch (sizeof(a->element[0])) { \
638 case 8: \
639 r->u64[i] = result; \
640 break; \
641 case 4: \
642 r->u32[i] = result; \
643 break; \
644 case 2: \
645 r->u16[i] = result; \
646 break; \
647 case 1: \
648 r->u8[i] = result; \
649 break; \
650 } \
651 all &= result; \
652 none |= result; \
653 } \
654 if (record) { \
655 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
656 } \
657 }
658 #define VCMP(suffix, compare, element) \
659 VCMP_DO(suffix, compare, element, 0) \
660 VCMP_DO(suffix##_dot, compare, element, 1)
661 VCMP(equb, ==, u8)
662 VCMP(equh, ==, u16)
663 VCMP(equw, ==, u32)
664 VCMP(equd, ==, u64)
665 VCMP(gtub, >, u8)
666 VCMP(gtuh, >, u16)
667 VCMP(gtuw, >, u32)
668 VCMP(gtud, >, u64)
669 VCMP(gtsb, >, s8)
670 VCMP(gtsh, >, s16)
671 VCMP(gtsw, >, s32)
672 VCMP(gtsd, >, s64)
673 #undef VCMP_DO
674 #undef VCMP
675
676 #define VCMPFP_DO(suffix, compare, order, record) \
677 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
678 ppc_avr_t *a, ppc_avr_t *b) \
679 { \
680 uint32_t ones = (uint32_t)-1; \
681 uint32_t all = ones; \
682 uint32_t none = 0; \
683 int i; \
684 \
685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
686 uint32_t result; \
687 int rel = float32_compare_quiet(a->f[i], b->f[i], \
688 &env->vec_status); \
689 if (rel == float_relation_unordered) { \
690 result = 0; \
691 } else if (rel compare order) { \
692 result = ones; \
693 } else { \
694 result = 0; \
695 } \
696 r->u32[i] = result; \
697 all &= result; \
698 none |= result; \
699 } \
700 if (record) { \
701 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
702 } \
703 }
704 #define VCMPFP(suffix, compare, order) \
705 VCMPFP_DO(suffix, compare, order, 0) \
706 VCMPFP_DO(suffix##_dot, compare, order, 1)
707 VCMPFP(eqfp, ==, float_relation_equal)
708 VCMPFP(gefp, !=, float_relation_less)
709 VCMPFP(gtfp, ==, float_relation_greater)
710 #undef VCMPFP_DO
711 #undef VCMPFP
712
713 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
714 ppc_avr_t *a, ppc_avr_t *b, int record)
715 {
716 int i;
717 int all_in = 0;
718
719 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
720 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
721 if (le_rel == float_relation_unordered) {
722 r->u32[i] = 0xc0000000;
723 /* ALL_IN does not need to be updated here. */
724 } else {
725 float32 bneg = float32_chs(b->f[i]);
726 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
727 int le = le_rel != float_relation_greater;
728 int ge = ge_rel != float_relation_less;
729
730 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
731 all_in |= (!le | !ge);
732 }
733 }
734 if (record) {
735 env->crf[6] = (all_in == 0) << 1;
736 }
737 }
738
739 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
740 {
741 vcmpbfp_internal(env, r, a, b, 0);
742 }
743
744 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
745 ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 1);
748 }
749
750 #define VCT(suffix, satcvt, element) \
751 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
752 ppc_avr_t *b, uint32_t uim) \
753 { \
754 int i; \
755 int sat = 0; \
756 float_status s = env->vec_status; \
757 \
758 set_float_rounding_mode(float_round_to_zero, &s); \
759 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
760 if (float32_is_any_nan(b->f[i])) { \
761 r->element[i] = 0; \
762 } else { \
763 float64 t = float32_to_float64(b->f[i], &s); \
764 int64_t j; \
765 \
766 t = float64_scalbn(t, uim, &s); \
767 j = float64_to_int64(t, &s); \
768 r->element[i] = satcvt(j, &sat); \
769 } \
770 } \
771 if (sat) { \
772 env->vscr |= (1 << VSCR_SAT); \
773 } \
774 }
775 VCT(uxs, cvtsduw, u32)
776 VCT(sxs, cvtsdsw, s32)
777 #undef VCT
778
779 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
780 ppc_avr_t *b, ppc_avr_t *c)
781 {
782 int sat = 0;
783 int i;
784
785 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
786 int32_t prod = a->s16[i] * b->s16[i];
787 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
788
789 r->s16[i] = cvtswsh(t, &sat);
790 }
791
792 if (sat) {
793 env->vscr |= (1 << VSCR_SAT);
794 }
795 }
796
797 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
798 ppc_avr_t *b, ppc_avr_t *c)
799 {
800 int sat = 0;
801 int i;
802
803 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
804 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
805 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
806 r->s16[i] = cvtswsh(t, &sat);
807 }
808
809 if (sat) {
810 env->vscr |= (1 << VSCR_SAT);
811 }
812 }
813
814 #define VMINMAX_DO(name, compare, element) \
815 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
816 { \
817 int i; \
818 \
819 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
820 if (a->element[i] compare b->element[i]) { \
821 r->element[i] = b->element[i]; \
822 } else { \
823 r->element[i] = a->element[i]; \
824 } \
825 } \
826 }
827 #define VMINMAX(suffix, element) \
828 VMINMAX_DO(min##suffix, >, element) \
829 VMINMAX_DO(max##suffix, <, element)
830 VMINMAX(sb, s8)
831 VMINMAX(sh, s16)
832 VMINMAX(sw, s32)
833 VMINMAX(sd, s64)
834 VMINMAX(ub, u8)
835 VMINMAX(uh, u16)
836 VMINMAX(uw, u32)
837 VMINMAX(ud, u64)
838 #undef VMINMAX_DO
839 #undef VMINMAX
840
841 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
842 {
843 int i;
844
845 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
846 int32_t prod = a->s16[i] * b->s16[i];
847 r->s16[i] = (int16_t) (prod + c->s16[i]);
848 }
849 }
850
851 #define VMRG_DO(name, element, highp) \
852 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
853 { \
854 ppc_avr_t result; \
855 int i; \
856 size_t n_elems = ARRAY_SIZE(r->element); \
857 \
858 for (i = 0; i < n_elems / 2; i++) { \
859 if (highp) { \
860 result.element[i*2+HI_IDX] = a->element[i]; \
861 result.element[i*2+LO_IDX] = b->element[i]; \
862 } else { \
863 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
864 b->element[n_elems - i - 1]; \
865 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
866 a->element[n_elems - i - 1]; \
867 } \
868 } \
869 *r = result; \
870 }
871 #if defined(HOST_WORDS_BIGENDIAN)
872 #define MRGHI 0
873 #define MRGLO 1
874 #else
875 #define MRGHI 1
876 #define MRGLO 0
877 #endif
878 #define VMRG(suffix, element) \
879 VMRG_DO(mrgl##suffix, element, MRGHI) \
880 VMRG_DO(mrgh##suffix, element, MRGLO)
881 VMRG(b, u8)
882 VMRG(h, u16)
883 VMRG(w, u32)
884 #undef VMRG_DO
885 #undef VMRG
886 #undef MRGHI
887 #undef MRGLO
888
889 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
890 ppc_avr_t *b, ppc_avr_t *c)
891 {
892 int32_t prod[16];
893 int i;
894
895 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
896 prod[i] = (int32_t)a->s8[i] * b->u8[i];
897 }
898
899 VECTOR_FOR_INORDER_I(i, s32) {
900 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
901 prod[4 * i + 2] + prod[4 * i + 3];
902 }
903 }
904
905 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
906 ppc_avr_t *b, ppc_avr_t *c)
907 {
908 int32_t prod[8];
909 int i;
910
911 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
912 prod[i] = a->s16[i] * b->s16[i];
913 }
914
915 VECTOR_FOR_INORDER_I(i, s32) {
916 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
917 }
918 }
919
920 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
921 ppc_avr_t *b, ppc_avr_t *c)
922 {
923 int32_t prod[8];
924 int i;
925 int sat = 0;
926
927 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
928 prod[i] = (int32_t)a->s16[i] * b->s16[i];
929 }
930
931 VECTOR_FOR_INORDER_I(i, s32) {
932 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
933
934 r->u32[i] = cvtsdsw(t, &sat);
935 }
936
937 if (sat) {
938 env->vscr |= (1 << VSCR_SAT);
939 }
940 }
941
942 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
943 ppc_avr_t *b, ppc_avr_t *c)
944 {
945 uint16_t prod[16];
946 int i;
947
948 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
949 prod[i] = a->u8[i] * b->u8[i];
950 }
951
952 VECTOR_FOR_INORDER_I(i, u32) {
953 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
954 prod[4 * i + 2] + prod[4 * i + 3];
955 }
956 }
957
958 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
959 ppc_avr_t *b, ppc_avr_t *c)
960 {
961 uint32_t prod[8];
962 int i;
963
964 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
965 prod[i] = a->u16[i] * b->u16[i];
966 }
967
968 VECTOR_FOR_INORDER_I(i, u32) {
969 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
970 }
971 }
972
973 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
974 ppc_avr_t *b, ppc_avr_t *c)
975 {
976 uint32_t prod[8];
977 int i;
978 int sat = 0;
979
980 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
981 prod[i] = a->u16[i] * b->u16[i];
982 }
983
984 VECTOR_FOR_INORDER_I(i, s32) {
985 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
986
987 r->u32[i] = cvtuduw(t, &sat);
988 }
989
990 if (sat) {
991 env->vscr |= (1 << VSCR_SAT);
992 }
993 }
994
995 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
996 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
997 { \
998 int i; \
999 \
1000 VECTOR_FOR_INORDER_I(i, prod_element) { \
1001 if (evenp) { \
1002 r->prod_element[i] = \
1003 (cast)a->mul_element[i * 2 + HI_IDX] * \
1004 (cast)b->mul_element[i * 2 + HI_IDX]; \
1005 } else { \
1006 r->prod_element[i] = \
1007 (cast)a->mul_element[i * 2 + LO_IDX] * \
1008 (cast)b->mul_element[i * 2 + LO_IDX]; \
1009 } \
1010 } \
1011 }
1012 #define VMUL(suffix, mul_element, prod_element, cast) \
1013 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1014 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1015 VMUL(sb, s8, s16, int16_t)
1016 VMUL(sh, s16, s32, int32_t)
1017 VMUL(sw, s32, s64, int64_t)
1018 VMUL(ub, u8, u16, uint16_t)
1019 VMUL(uh, u16, u32, uint32_t)
1020 VMUL(uw, u32, u64, uint64_t)
1021 #undef VMUL_DO
1022 #undef VMUL
1023
1024 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1025 ppc_avr_t *c)
1026 {
1027 ppc_avr_t result;
1028 int i;
1029
1030 VECTOR_FOR_INORDER_I(i, u8) {
1031 int s = c->u8[i] & 0x1f;
1032 #if defined(HOST_WORDS_BIGENDIAN)
1033 int index = s & 0xf;
1034 #else
1035 int index = 15 - (s & 0xf);
1036 #endif
1037
1038 if (s & 0x10) {
1039 result.u8[i] = b->u8[index];
1040 } else {
1041 result.u8[i] = a->u8[index];
1042 }
1043 }
1044 *r = result;
1045 }
1046
1047 #if defined(HOST_WORDS_BIGENDIAN)
1048 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1049 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1050 #else
1051 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1052 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1053 #endif
1054
1055 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1056 {
1057 int i;
1058 uint64_t perm = 0;
1059
1060 VECTOR_FOR_INORDER_I(i, u8) {
1061 int index = VBPERMQ_INDEX(b, i);
1062
1063 if (index < 128) {
1064 uint64_t mask = (1ull << (63-(index & 0x3F)));
1065 if (a->u64[VBPERMQ_DW(index)] & mask) {
1066 perm |= (0x8000 >> i);
1067 }
1068 }
1069 }
1070
1071 r->u64[HI_IDX] = perm;
1072 r->u64[LO_IDX] = 0;
1073 }
1074
1075 #undef VBPERMQ_INDEX
1076 #undef VBPERMQ_DW
1077
1078 uint64_t VGBBD_MASKS[256] = {
1079 0x0000000000000000ull, /* 00 */
1080 0x0000000000000080ull, /* 01 */
1081 0x0000000000008000ull, /* 02 */
1082 0x0000000000008080ull, /* 03 */
1083 0x0000000000800000ull, /* 04 */
1084 0x0000000000800080ull, /* 05 */
1085 0x0000000000808000ull, /* 06 */
1086 0x0000000000808080ull, /* 07 */
1087 0x0000000080000000ull, /* 08 */
1088 0x0000000080000080ull, /* 09 */
1089 0x0000000080008000ull, /* 0A */
1090 0x0000000080008080ull, /* 0B */
1091 0x0000000080800000ull, /* 0C */
1092 0x0000000080800080ull, /* 0D */
1093 0x0000000080808000ull, /* 0E */
1094 0x0000000080808080ull, /* 0F */
1095 0x0000008000000000ull, /* 10 */
1096 0x0000008000000080ull, /* 11 */
1097 0x0000008000008000ull, /* 12 */
1098 0x0000008000008080ull, /* 13 */
1099 0x0000008000800000ull, /* 14 */
1100 0x0000008000800080ull, /* 15 */
1101 0x0000008000808000ull, /* 16 */
1102 0x0000008000808080ull, /* 17 */
1103 0x0000008080000000ull, /* 18 */
1104 0x0000008080000080ull, /* 19 */
1105 0x0000008080008000ull, /* 1A */
1106 0x0000008080008080ull, /* 1B */
1107 0x0000008080800000ull, /* 1C */
1108 0x0000008080800080ull, /* 1D */
1109 0x0000008080808000ull, /* 1E */
1110 0x0000008080808080ull, /* 1F */
1111 0x0000800000000000ull, /* 20 */
1112 0x0000800000000080ull, /* 21 */
1113 0x0000800000008000ull, /* 22 */
1114 0x0000800000008080ull, /* 23 */
1115 0x0000800000800000ull, /* 24 */
1116 0x0000800000800080ull, /* 25 */
1117 0x0000800000808000ull, /* 26 */
1118 0x0000800000808080ull, /* 27 */
1119 0x0000800080000000ull, /* 28 */
1120 0x0000800080000080ull, /* 29 */
1121 0x0000800080008000ull, /* 2A */
1122 0x0000800080008080ull, /* 2B */
1123 0x0000800080800000ull, /* 2C */
1124 0x0000800080800080ull, /* 2D */
1125 0x0000800080808000ull, /* 2E */
1126 0x0000800080808080ull, /* 2F */
1127 0x0000808000000000ull, /* 30 */
1128 0x0000808000000080ull, /* 31 */
1129 0x0000808000008000ull, /* 32 */
1130 0x0000808000008080ull, /* 33 */
1131 0x0000808000800000ull, /* 34 */
1132 0x0000808000800080ull, /* 35 */
1133 0x0000808000808000ull, /* 36 */
1134 0x0000808000808080ull, /* 37 */
1135 0x0000808080000000ull, /* 38 */
1136 0x0000808080000080ull, /* 39 */
1137 0x0000808080008000ull, /* 3A */
1138 0x0000808080008080ull, /* 3B */
1139 0x0000808080800000ull, /* 3C */
1140 0x0000808080800080ull, /* 3D */
1141 0x0000808080808000ull, /* 3E */
1142 0x0000808080808080ull, /* 3F */
1143 0x0080000000000000ull, /* 40 */
1144 0x0080000000000080ull, /* 41 */
1145 0x0080000000008000ull, /* 42 */
1146 0x0080000000008080ull, /* 43 */
1147 0x0080000000800000ull, /* 44 */
1148 0x0080000000800080ull, /* 45 */
1149 0x0080000000808000ull, /* 46 */
1150 0x0080000000808080ull, /* 47 */
1151 0x0080000080000000ull, /* 48 */
1152 0x0080000080000080ull, /* 49 */
1153 0x0080000080008000ull, /* 4A */
1154 0x0080000080008080ull, /* 4B */
1155 0x0080000080800000ull, /* 4C */
1156 0x0080000080800080ull, /* 4D */
1157 0x0080000080808000ull, /* 4E */
1158 0x0080000080808080ull, /* 4F */
1159 0x0080008000000000ull, /* 50 */
1160 0x0080008000000080ull, /* 51 */
1161 0x0080008000008000ull, /* 52 */
1162 0x0080008000008080ull, /* 53 */
1163 0x0080008000800000ull, /* 54 */
1164 0x0080008000800080ull, /* 55 */
1165 0x0080008000808000ull, /* 56 */
1166 0x0080008000808080ull, /* 57 */
1167 0x0080008080000000ull, /* 58 */
1168 0x0080008080000080ull, /* 59 */
1169 0x0080008080008000ull, /* 5A */
1170 0x0080008080008080ull, /* 5B */
1171 0x0080008080800000ull, /* 5C */
1172 0x0080008080800080ull, /* 5D */
1173 0x0080008080808000ull, /* 5E */
1174 0x0080008080808080ull, /* 5F */
1175 0x0080800000000000ull, /* 60 */
1176 0x0080800000000080ull, /* 61 */
1177 0x0080800000008000ull, /* 62 */
1178 0x0080800000008080ull, /* 63 */
1179 0x0080800000800000ull, /* 64 */
1180 0x0080800000800080ull, /* 65 */
1181 0x0080800000808000ull, /* 66 */
1182 0x0080800000808080ull, /* 67 */
1183 0x0080800080000000ull, /* 68 */
1184 0x0080800080000080ull, /* 69 */
1185 0x0080800080008000ull, /* 6A */
1186 0x0080800080008080ull, /* 6B */
1187 0x0080800080800000ull, /* 6C */
1188 0x0080800080800080ull, /* 6D */
1189 0x0080800080808000ull, /* 6E */
1190 0x0080800080808080ull, /* 6F */
1191 0x0080808000000000ull, /* 70 */
1192 0x0080808000000080ull, /* 71 */
1193 0x0080808000008000ull, /* 72 */
1194 0x0080808000008080ull, /* 73 */
1195 0x0080808000800000ull, /* 74 */
1196 0x0080808000800080ull, /* 75 */
1197 0x0080808000808000ull, /* 76 */
1198 0x0080808000808080ull, /* 77 */
1199 0x0080808080000000ull, /* 78 */
1200 0x0080808080000080ull, /* 79 */
1201 0x0080808080008000ull, /* 7A */
1202 0x0080808080008080ull, /* 7B */
1203 0x0080808080800000ull, /* 7C */
1204 0x0080808080800080ull, /* 7D */
1205 0x0080808080808000ull, /* 7E */
1206 0x0080808080808080ull, /* 7F */
1207 0x8000000000000000ull, /* 80 */
1208 0x8000000000000080ull, /* 81 */
1209 0x8000000000008000ull, /* 82 */
1210 0x8000000000008080ull, /* 83 */
1211 0x8000000000800000ull, /* 84 */
1212 0x8000000000800080ull, /* 85 */
1213 0x8000000000808000ull, /* 86 */
1214 0x8000000000808080ull, /* 87 */
1215 0x8000000080000000ull, /* 88 */
1216 0x8000000080000080ull, /* 89 */
1217 0x8000000080008000ull, /* 8A */
1218 0x8000000080008080ull, /* 8B */
1219 0x8000000080800000ull, /* 8C */
1220 0x8000000080800080ull, /* 8D */
1221 0x8000000080808000ull, /* 8E */
1222 0x8000000080808080ull, /* 8F */
1223 0x8000008000000000ull, /* 90 */
1224 0x8000008000000080ull, /* 91 */
1225 0x8000008000008000ull, /* 92 */
1226 0x8000008000008080ull, /* 93 */
1227 0x8000008000800000ull, /* 94 */
1228 0x8000008000800080ull, /* 95 */
1229 0x8000008000808000ull, /* 96 */
1230 0x8000008000808080ull, /* 97 */
1231 0x8000008080000000ull, /* 98 */
1232 0x8000008080000080ull, /* 99 */
1233 0x8000008080008000ull, /* 9A */
1234 0x8000008080008080ull, /* 9B */
1235 0x8000008080800000ull, /* 9C */
1236 0x8000008080800080ull, /* 9D */
1237 0x8000008080808000ull, /* 9E */
1238 0x8000008080808080ull, /* 9F */
1239 0x8000800000000000ull, /* A0 */
1240 0x8000800000000080ull, /* A1 */
1241 0x8000800000008000ull, /* A2 */
1242 0x8000800000008080ull, /* A3 */
1243 0x8000800000800000ull, /* A4 */
1244 0x8000800000800080ull, /* A5 */
1245 0x8000800000808000ull, /* A6 */
1246 0x8000800000808080ull, /* A7 */
1247 0x8000800080000000ull, /* A8 */
1248 0x8000800080000080ull, /* A9 */
1249 0x8000800080008000ull, /* AA */
1250 0x8000800080008080ull, /* AB */
1251 0x8000800080800000ull, /* AC */
1252 0x8000800080800080ull, /* AD */
1253 0x8000800080808000ull, /* AE */
1254 0x8000800080808080ull, /* AF */
1255 0x8000808000000000ull, /* B0 */
1256 0x8000808000000080ull, /* B1 */
1257 0x8000808000008000ull, /* B2 */
1258 0x8000808000008080ull, /* B3 */
1259 0x8000808000800000ull, /* B4 */
1260 0x8000808000800080ull, /* B5 */
1261 0x8000808000808000ull, /* B6 */
1262 0x8000808000808080ull, /* B7 */
1263 0x8000808080000000ull, /* B8 */
1264 0x8000808080000080ull, /* B9 */
1265 0x8000808080008000ull, /* BA */
1266 0x8000808080008080ull, /* BB */
1267 0x8000808080800000ull, /* BC */
1268 0x8000808080800080ull, /* BD */
1269 0x8000808080808000ull, /* BE */
1270 0x8000808080808080ull, /* BF */
1271 0x8080000000000000ull, /* C0 */
1272 0x8080000000000080ull, /* C1 */
1273 0x8080000000008000ull, /* C2 */
1274 0x8080000000008080ull, /* C3 */
1275 0x8080000000800000ull, /* C4 */
1276 0x8080000000800080ull, /* C5 */
1277 0x8080000000808000ull, /* C6 */
1278 0x8080000000808080ull, /* C7 */
1279 0x8080000080000000ull, /* C8 */
1280 0x8080000080000080ull, /* C9 */
1281 0x8080000080008000ull, /* CA */
1282 0x8080000080008080ull, /* CB */
1283 0x8080000080800000ull, /* CC */
1284 0x8080000080800080ull, /* CD */
1285 0x8080000080808000ull, /* CE */
1286 0x8080000080808080ull, /* CF */
1287 0x8080008000000000ull, /* D0 */
1288 0x8080008000000080ull, /* D1 */
1289 0x8080008000008000ull, /* D2 */
1290 0x8080008000008080ull, /* D3 */
1291 0x8080008000800000ull, /* D4 */
1292 0x8080008000800080ull, /* D5 */
1293 0x8080008000808000ull, /* D6 */
1294 0x8080008000808080ull, /* D7 */
1295 0x8080008080000000ull, /* D8 */
1296 0x8080008080000080ull, /* D9 */
1297 0x8080008080008000ull, /* DA */
1298 0x8080008080008080ull, /* DB */
1299 0x8080008080800000ull, /* DC */
1300 0x8080008080800080ull, /* DD */
1301 0x8080008080808000ull, /* DE */
1302 0x8080008080808080ull, /* DF */
1303 0x8080800000000000ull, /* E0 */
1304 0x8080800000000080ull, /* E1 */
1305 0x8080800000008000ull, /* E2 */
1306 0x8080800000008080ull, /* E3 */
1307 0x8080800000800000ull, /* E4 */
1308 0x8080800000800080ull, /* E5 */
1309 0x8080800000808000ull, /* E6 */
1310 0x8080800000808080ull, /* E7 */
1311 0x8080800080000000ull, /* E8 */
1312 0x8080800080000080ull, /* E9 */
1313 0x8080800080008000ull, /* EA */
1314 0x8080800080008080ull, /* EB */
1315 0x8080800080800000ull, /* EC */
1316 0x8080800080800080ull, /* ED */
1317 0x8080800080808000ull, /* EE */
1318 0x8080800080808080ull, /* EF */
1319 0x8080808000000000ull, /* F0 */
1320 0x8080808000000080ull, /* F1 */
1321 0x8080808000008000ull, /* F2 */
1322 0x8080808000008080ull, /* F3 */
1323 0x8080808000800000ull, /* F4 */
1324 0x8080808000800080ull, /* F5 */
1325 0x8080808000808000ull, /* F6 */
1326 0x8080808000808080ull, /* F7 */
1327 0x8080808080000000ull, /* F8 */
1328 0x8080808080000080ull, /* F9 */
1329 0x8080808080008000ull, /* FA */
1330 0x8080808080008080ull, /* FB */
1331 0x8080808080800000ull, /* FC */
1332 0x8080808080800080ull, /* FD */
1333 0x8080808080808000ull, /* FE */
1334 0x8080808080808080ull, /* FF */
1335 };
1336
1337 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1338 {
1339 int i;
1340 uint64_t t[2] = { 0, 0 };
1341
1342 VECTOR_FOR_INORDER_I(i, u8) {
1343 #if defined(HOST_WORDS_BIGENDIAN)
1344 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1345 #else
1346 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1347 #endif
1348 }
1349
1350 r->u64[0] = t[0];
1351 r->u64[1] = t[1];
1352 }
1353
1354 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1355 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1356 { \
1357 int i, j; \
1358 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1359 \
1360 VECTOR_FOR_INORDER_I(i, srcfld) { \
1361 prod[i] = 0; \
1362 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1363 if (a->srcfld[i] & (1ull<<j)) { \
1364 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1365 } \
1366 } \
1367 } \
1368 \
1369 VECTOR_FOR_INORDER_I(i, trgfld) { \
1370 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1371 } \
1372 }
1373
1374 PMSUM(vpmsumb, u8, u16, uint16_t)
1375 PMSUM(vpmsumh, u16, u32, uint32_t)
1376 PMSUM(vpmsumw, u32, u64, uint64_t)
1377
1378 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1379 {
1380
1381 #ifdef CONFIG_INT128
1382 int i, j;
1383 __uint128_t prod[2];
1384
1385 VECTOR_FOR_INORDER_I(i, u64) {
1386 prod[i] = 0;
1387 for (j = 0; j < 64; j++) {
1388 if (a->u64[i] & (1ull<<j)) {
1389 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1390 }
1391 }
1392 }
1393
1394 r->u128 = prod[0] ^ prod[1];
1395
1396 #else
1397 int i, j;
1398 ppc_avr_t prod[2];
1399
1400 VECTOR_FOR_INORDER_I(i, u64) {
1401 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1402 for (j = 0; j < 64; j++) {
1403 if (a->u64[i] & (1ull<<j)) {
1404 ppc_avr_t bshift;
1405 if (j == 0) {
1406 bshift.u64[HI_IDX] = 0;
1407 bshift.u64[LO_IDX] = b->u64[i];
1408 } else {
1409 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1410 bshift.u64[LO_IDX] = b->u64[i] << j;
1411 }
1412 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1413 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1414 }
1415 }
1416 }
1417
1418 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1419 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1420 #endif
1421 }
1422
1423
1424 #if defined(HOST_WORDS_BIGENDIAN)
1425 #define PKBIG 1
1426 #else
1427 #define PKBIG 0
1428 #endif
1429 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1430 {
1431 int i, j;
1432 ppc_avr_t result;
1433 #if defined(HOST_WORDS_BIGENDIAN)
1434 const ppc_avr_t *x[2] = { a, b };
1435 #else
1436 const ppc_avr_t *x[2] = { b, a };
1437 #endif
1438
1439 VECTOR_FOR_INORDER_I(i, u64) {
1440 VECTOR_FOR_INORDER_I(j, u32) {
1441 uint32_t e = x[i]->u32[j];
1442
1443 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1444 ((e >> 6) & 0x3e0) |
1445 ((e >> 3) & 0x1f));
1446 }
1447 }
1448 *r = result;
1449 }
1450
1451 #define VPK(suffix, from, to, cvt, dosat) \
1452 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1453 ppc_avr_t *a, ppc_avr_t *b) \
1454 { \
1455 int i; \
1456 int sat = 0; \
1457 ppc_avr_t result; \
1458 ppc_avr_t *a0 = PKBIG ? a : b; \
1459 ppc_avr_t *a1 = PKBIG ? b : a; \
1460 \
1461 VECTOR_FOR_INORDER_I(i, from) { \
1462 result.to[i] = cvt(a0->from[i], &sat); \
1463 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1464 } \
1465 *r = result; \
1466 if (dosat && sat) { \
1467 env->vscr |= (1 << VSCR_SAT); \
1468 } \
1469 }
1470 #define I(x, y) (x)
1471 VPK(shss, s16, s8, cvtshsb, 1)
1472 VPK(shus, s16, u8, cvtshub, 1)
1473 VPK(swss, s32, s16, cvtswsh, 1)
1474 VPK(swus, s32, u16, cvtswuh, 1)
1475 VPK(sdss, s64, s32, cvtsdsw, 1)
1476 VPK(sdus, s64, u32, cvtsduw, 1)
1477 VPK(uhus, u16, u8, cvtuhub, 1)
1478 VPK(uwus, u32, u16, cvtuwuh, 1)
1479 VPK(udus, u64, u32, cvtuduw, 1)
1480 VPK(uhum, u16, u8, I, 0)
1481 VPK(uwum, u32, u16, I, 0)
1482 VPK(udum, u64, u32, I, 0)
1483 #undef I
1484 #undef VPK
1485 #undef PKBIG
1486
1487 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1488 {
1489 int i;
1490
1491 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1492 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1493 }
1494 }
1495
1496 #define VRFI(suffix, rounding) \
1497 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1498 ppc_avr_t *b) \
1499 { \
1500 int i; \
1501 float_status s = env->vec_status; \
1502 \
1503 set_float_rounding_mode(rounding, &s); \
1504 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1505 r->f[i] = float32_round_to_int (b->f[i], &s); \
1506 } \
1507 }
1508 VRFI(n, float_round_nearest_even)
1509 VRFI(m, float_round_down)
1510 VRFI(p, float_round_up)
1511 VRFI(z, float_round_to_zero)
1512 #undef VRFI
1513
1514 #define VROTATE(suffix, element, mask) \
1515 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1516 { \
1517 int i; \
1518 \
1519 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1520 unsigned int shift = b->element[i] & mask; \
1521 r->element[i] = (a->element[i] << shift) | \
1522 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1523 } \
1524 }
1525 VROTATE(b, u8, 0x7)
1526 VROTATE(h, u16, 0xF)
1527 VROTATE(w, u32, 0x1F)
1528 VROTATE(d, u64, 0x3F)
1529 #undef VROTATE
1530
1531 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1532 {
1533 int i;
1534
1535 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1536 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1537
1538 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1539 }
1540 }
1541
1542 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1543 ppc_avr_t *c)
1544 {
1545 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1546 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1547 }
1548
1549 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1550 {
1551 int i;
1552
1553 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1554 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1555 }
1556 }
1557
1558 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1559 {
1560 int i;
1561
1562 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1563 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1564 }
1565 }
1566
1567 #if defined(HOST_WORDS_BIGENDIAN)
1568 #define LEFT 0
1569 #define RIGHT 1
1570 #else
1571 #define LEFT 1
1572 #define RIGHT 0
1573 #endif
1574 /* The specification says that the results are undefined if all of the
1575 * shift counts are not identical. We check to make sure that they are
1576 * to conform to what real hardware appears to do. */
1577 #define VSHIFT(suffix, leftp) \
1578 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1579 { \
1580 int shift = b->u8[LO_IDX*15] & 0x7; \
1581 int doit = 1; \
1582 int i; \
1583 \
1584 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1585 doit = doit && ((b->u8[i] & 0x7) == shift); \
1586 } \
1587 if (doit) { \
1588 if (shift == 0) { \
1589 *r = *a; \
1590 } else if (leftp) { \
1591 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1592 \
1593 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1594 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1595 } else { \
1596 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1597 \
1598 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1599 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1600 } \
1601 } \
1602 }
1603 VSHIFT(l, LEFT)
1604 VSHIFT(r, RIGHT)
1605 #undef VSHIFT
1606 #undef LEFT
1607 #undef RIGHT
1608
1609 #define VSL(suffix, element, mask) \
1610 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1611 { \
1612 int i; \
1613 \
1614 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1615 unsigned int shift = b->element[i] & mask; \
1616 \
1617 r->element[i] = a->element[i] << shift; \
1618 } \
1619 }
1620 VSL(b, u8, 0x7)
1621 VSL(h, u16, 0x0F)
1622 VSL(w, u32, 0x1F)
1623 VSL(d, u64, 0x3F)
1624 #undef VSL
1625
1626 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1627 {
1628 int sh = shift & 0xf;
1629 int i;
1630 ppc_avr_t result;
1631
1632 #if defined(HOST_WORDS_BIGENDIAN)
1633 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1634 int index = sh + i;
1635 if (index > 0xf) {
1636 result.u8[i] = b->u8[index - 0x10];
1637 } else {
1638 result.u8[i] = a->u8[index];
1639 }
1640 }
1641 #else
1642 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1643 int index = (16 - sh) + i;
1644 if (index > 0xf) {
1645 result.u8[i] = a->u8[index - 0x10];
1646 } else {
1647 result.u8[i] = b->u8[index];
1648 }
1649 }
1650 #endif
1651 *r = result;
1652 }
1653
1654 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1655 {
1656 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1657
1658 #if defined(HOST_WORDS_BIGENDIAN)
1659 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1660 memset(&r->u8[16-sh], 0, sh);
1661 #else
1662 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1663 memset(&r->u8[0], 0, sh);
1664 #endif
1665 }
1666
1667 /* Experimental testing shows that hardware masks the immediate. */
1668 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1669 #if defined(HOST_WORDS_BIGENDIAN)
1670 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1671 #else
1672 #define SPLAT_ELEMENT(element) \
1673 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1674 #endif
1675 #define VSPLT(suffix, element) \
1676 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1677 { \
1678 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1679 int i; \
1680 \
1681 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1682 r->element[i] = s; \
1683 } \
1684 }
1685 VSPLT(b, u8)
1686 VSPLT(h, u16)
1687 VSPLT(w, u32)
1688 #undef VSPLT
1689 #undef SPLAT_ELEMENT
1690 #undef _SPLAT_MASKED
1691
1692 #define VSPLTI(suffix, element, splat_type) \
1693 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1694 { \
1695 splat_type x = (int8_t)(splat << 3) >> 3; \
1696 int i; \
1697 \
1698 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1699 r->element[i] = x; \
1700 } \
1701 }
1702 VSPLTI(b, s8, int8_t)
1703 VSPLTI(h, s16, int16_t)
1704 VSPLTI(w, s32, int32_t)
1705 #undef VSPLTI
1706
1707 #define VSR(suffix, element, mask) \
1708 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1709 { \
1710 int i; \
1711 \
1712 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1713 unsigned int shift = b->element[i] & mask; \
1714 r->element[i] = a->element[i] >> shift; \
1715 } \
1716 }
1717 VSR(ab, s8, 0x7)
1718 VSR(ah, s16, 0xF)
1719 VSR(aw, s32, 0x1F)
1720 VSR(ad, s64, 0x3F)
1721 VSR(b, u8, 0x7)
1722 VSR(h, u16, 0xF)
1723 VSR(w, u32, 0x1F)
1724 VSR(d, u64, 0x3F)
1725 #undef VSR
1726
1727 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1728 {
1729 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1730
1731 #if defined(HOST_WORDS_BIGENDIAN)
1732 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1733 memset(&r->u8[0], 0, sh);
1734 #else
1735 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1736 memset(&r->u8[16 - sh], 0, sh);
1737 #endif
1738 }
1739
1740 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1741 {
1742 int i;
1743
1744 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1745 r->u32[i] = a->u32[i] >= b->u32[i];
1746 }
1747 }
1748
1749 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1750 {
1751 int64_t t;
1752 int i, upper;
1753 ppc_avr_t result;
1754 int sat = 0;
1755
1756 #if defined(HOST_WORDS_BIGENDIAN)
1757 upper = ARRAY_SIZE(r->s32)-1;
1758 #else
1759 upper = 0;
1760 #endif
1761 t = (int64_t)b->s32[upper];
1762 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1763 t += a->s32[i];
1764 result.s32[i] = 0;
1765 }
1766 result.s32[upper] = cvtsdsw(t, &sat);
1767 *r = result;
1768
1769 if (sat) {
1770 env->vscr |= (1 << VSCR_SAT);
1771 }
1772 }
1773
1774 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1775 {
1776 int i, j, upper;
1777 ppc_avr_t result;
1778 int sat = 0;
1779
1780 #if defined(HOST_WORDS_BIGENDIAN)
1781 upper = 1;
1782 #else
1783 upper = 0;
1784 #endif
1785 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1786 int64_t t = (int64_t)b->s32[upper + i * 2];
1787
1788 result.u64[i] = 0;
1789 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1790 t += a->s32[2 * i + j];
1791 }
1792 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1793 }
1794
1795 *r = result;
1796 if (sat) {
1797 env->vscr |= (1 << VSCR_SAT);
1798 }
1799 }
1800
1801 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1802 {
1803 int i, j;
1804 int sat = 0;
1805
1806 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1807 int64_t t = (int64_t)b->s32[i];
1808
1809 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1810 t += a->s8[4 * i + j];
1811 }
1812 r->s32[i] = cvtsdsw(t, &sat);
1813 }
1814
1815 if (sat) {
1816 env->vscr |= (1 << VSCR_SAT);
1817 }
1818 }
1819
1820 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1821 {
1822 int sat = 0;
1823 int i;
1824
1825 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1826 int64_t t = (int64_t)b->s32[i];
1827
1828 t += a->s16[2 * i] + a->s16[2 * i + 1];
1829 r->s32[i] = cvtsdsw(t, &sat);
1830 }
1831
1832 if (sat) {
1833 env->vscr |= (1 << VSCR_SAT);
1834 }
1835 }
1836
1837 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1838 {
1839 int i, j;
1840 int sat = 0;
1841
1842 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1843 uint64_t t = (uint64_t)b->u32[i];
1844
1845 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1846 t += a->u8[4 * i + j];
1847 }
1848 r->u32[i] = cvtuduw(t, &sat);
1849 }
1850
1851 if (sat) {
1852 env->vscr |= (1 << VSCR_SAT);
1853 }
1854 }
1855
1856 #if defined(HOST_WORDS_BIGENDIAN)
1857 #define UPKHI 1
1858 #define UPKLO 0
1859 #else
1860 #define UPKHI 0
1861 #define UPKLO 1
1862 #endif
1863 #define VUPKPX(suffix, hi) \
1864 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1865 { \
1866 int i; \
1867 ppc_avr_t result; \
1868 \
1869 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1870 uint16_t e = b->u16[hi ? i : i+4]; \
1871 uint8_t a = (e >> 15) ? 0xff : 0; \
1872 uint8_t r = (e >> 10) & 0x1f; \
1873 uint8_t g = (e >> 5) & 0x1f; \
1874 uint8_t b = e & 0x1f; \
1875 \
1876 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1877 } \
1878 *r = result; \
1879 }
1880 VUPKPX(lpx, UPKLO)
1881 VUPKPX(hpx, UPKHI)
1882 #undef VUPKPX
1883
1884 #define VUPK(suffix, unpacked, packee, hi) \
1885 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1886 { \
1887 int i; \
1888 ppc_avr_t result; \
1889 \
1890 if (hi) { \
1891 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1892 result.unpacked[i] = b->packee[i]; \
1893 } \
1894 } else { \
1895 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1896 i++) { \
1897 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1898 } \
1899 } \
1900 *r = result; \
1901 }
1902 VUPK(hsb, s16, s8, UPKHI)
1903 VUPK(hsh, s32, s16, UPKHI)
1904 VUPK(hsw, s64, s32, UPKHI)
1905 VUPK(lsb, s16, s8, UPKLO)
1906 VUPK(lsh, s32, s16, UPKLO)
1907 VUPK(lsw, s64, s32, UPKLO)
1908 #undef VUPK
1909 #undef UPKHI
1910 #undef UPKLO
1911
1912 #define VGENERIC_DO(name, element) \
1913 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1914 { \
1915 int i; \
1916 \
1917 VECTOR_FOR_INORDER_I(i, element) { \
1918 r->element[i] = name(b->element[i]); \
1919 } \
1920 }
1921
1922 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1923 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1924 #define clzw(v) clz32((v))
1925 #define clzd(v) clz64((v))
1926
1927 VGENERIC_DO(clzb, u8)
1928 VGENERIC_DO(clzh, u16)
1929 VGENERIC_DO(clzw, u32)
1930 VGENERIC_DO(clzd, u64)
1931
1932 #undef clzb
1933 #undef clzh
1934 #undef clzw
1935 #undef clzd
1936
1937 #define popcntb(v) ctpop8(v)
1938 #define popcnth(v) ctpop16(v)
1939 #define popcntw(v) ctpop32(v)
1940 #define popcntd(v) ctpop64(v)
1941
1942 VGENERIC_DO(popcntb, u8)
1943 VGENERIC_DO(popcnth, u16)
1944 VGENERIC_DO(popcntw, u32)
1945 VGENERIC_DO(popcntd, u64)
1946
1947 #undef popcntb
1948 #undef popcnth
1949 #undef popcntw
1950 #undef popcntd
1951
1952 #undef VGENERIC_DO
1953
1954 #if defined(HOST_WORDS_BIGENDIAN)
1955 #define QW_ONE { .u64 = { 0, 1 } }
1956 #else
1957 #define QW_ONE { .u64 = { 1, 0 } }
1958 #endif
1959
1960 #ifndef CONFIG_INT128
1961
1962 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1963 {
1964 t->u64[0] = ~a.u64[0];
1965 t->u64[1] = ~a.u64[1];
1966 }
1967
1968 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1969 {
1970 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1971 return -1;
1972 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1973 return 1;
1974 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1975 return -1;
1976 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1977 return 1;
1978 } else {
1979 return 0;
1980 }
1981 }
1982
1983 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1984 {
1985 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1986 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1987 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1988 }
1989
1990 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1991 {
1992 ppc_avr_t not_a;
1993 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1994 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1995 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1996 avr_qw_not(&not_a, a);
1997 return avr_qw_cmpu(not_a, b) < 0;
1998 }
1999
2000 #endif
2001
2002 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2003 {
2004 #ifdef CONFIG_INT128
2005 r->u128 = a->u128 + b->u128;
2006 #else
2007 avr_qw_add(r, *a, *b);
2008 #endif
2009 }
2010
2011 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2012 {
2013 #ifdef CONFIG_INT128
2014 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2015 #else
2016
2017 if (c->u64[LO_IDX] & 1) {
2018 ppc_avr_t tmp;
2019
2020 tmp.u64[HI_IDX] = 0;
2021 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2022 avr_qw_add(&tmp, *a, tmp);
2023 avr_qw_add(r, tmp, *b);
2024 } else {
2025 avr_qw_add(r, *a, *b);
2026 }
2027 #endif
2028 }
2029
2030 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2031 {
2032 #ifdef CONFIG_INT128
2033 r->u128 = (~a->u128 < b->u128);
2034 #else
2035 ppc_avr_t not_a;
2036
2037 avr_qw_not(&not_a, *a);
2038
2039 r->u64[HI_IDX] = 0;
2040 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2041 #endif
2042 }
2043
2044 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2045 {
2046 #ifdef CONFIG_INT128
2047 int carry_out = (~a->u128 < b->u128);
2048 if (!carry_out && (c->u128 & 1)) {
2049 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2050 ((a->u128 != 0) || (b->u128 != 0));
2051 }
2052 r->u128 = carry_out;
2053 #else
2054
2055 int carry_in = c->u64[LO_IDX] & 1;
2056 int carry_out = 0;
2057 ppc_avr_t tmp;
2058
2059 carry_out = avr_qw_addc(&tmp, *a, *b);
2060
2061 if (!carry_out && carry_in) {
2062 ppc_avr_t one = QW_ONE;
2063 carry_out = avr_qw_addc(&tmp, tmp, one);
2064 }
2065 r->u64[HI_IDX] = 0;
2066 r->u64[LO_IDX] = carry_out;
2067 #endif
2068 }
2069
2070 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2071 {
2072 #ifdef CONFIG_INT128
2073 r->u128 = a->u128 - b->u128;
2074 #else
2075 ppc_avr_t tmp;
2076 ppc_avr_t one = QW_ONE;
2077
2078 avr_qw_not(&tmp, *b);
2079 avr_qw_add(&tmp, *a, tmp);
2080 avr_qw_add(r, tmp, one);
2081 #endif
2082 }
2083
2084 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2085 {
2086 #ifdef CONFIG_INT128
2087 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2088 #else
2089 ppc_avr_t tmp, sum;
2090
2091 avr_qw_not(&tmp, *b);
2092 avr_qw_add(&sum, *a, tmp);
2093
2094 tmp.u64[HI_IDX] = 0;
2095 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2096 avr_qw_add(r, sum, tmp);
2097 #endif
2098 }
2099
2100 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2101 {
2102 #ifdef CONFIG_INT128
2103 r->u128 = (~a->u128 < ~b->u128) ||
2104 (a->u128 + ~b->u128 == (__uint128_t)-1);
2105 #else
2106 int carry = (avr_qw_cmpu(*a, *b) > 0);
2107 if (!carry) {
2108 ppc_avr_t tmp;
2109 avr_qw_not(&tmp, *b);
2110 avr_qw_add(&tmp, *a, tmp);
2111 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2112 }
2113 r->u64[HI_IDX] = 0;
2114 r->u64[LO_IDX] = carry;
2115 #endif
2116 }
2117
2118 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2119 {
2120 #ifdef CONFIG_INT128
2121 r->u128 =
2122 (~a->u128 < ~b->u128) ||
2123 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2124 #else
2125 int carry_in = c->u64[LO_IDX] & 1;
2126 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2127 if (!carry_out && carry_in) {
2128 ppc_avr_t tmp;
2129 avr_qw_not(&tmp, *b);
2130 avr_qw_add(&tmp, *a, tmp);
2131 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2132 }
2133
2134 r->u64[HI_IDX] = 0;
2135 r->u64[LO_IDX] = carry_out;
2136 #endif
2137 }
2138
2139
2140 #undef VECTOR_FOR_INORDER_I
2141 #undef HI_IDX
2142 #undef LO_IDX
2143
2144 /*****************************************************************************/
2145 /* SPE extension helpers */
2146 /* Use a table to make this quicker */
2147 static const uint8_t hbrev[16] = {
2148 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2149 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2150 };
2151
2152 static inline uint8_t byte_reverse(uint8_t val)
2153 {
2154 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2155 }
2156
2157 static inline uint32_t word_reverse(uint32_t val)
2158 {
2159 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2160 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2161 }
2162
2163 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2164 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2165 {
2166 uint32_t a, b, d, mask;
2167
2168 mask = UINT32_MAX >> (32 - MASKBITS);
2169 a = arg1 & mask;
2170 b = arg2 & mask;
2171 d = word_reverse(1 + word_reverse(a | ~b));
2172 return (arg1 & ~mask) | (d & b);
2173 }
2174
2175 uint32_t helper_cntlsw32(uint32_t val)
2176 {
2177 if (val & 0x80000000) {
2178 return clz32(~val);
2179 } else {
2180 return clz32(val);
2181 }
2182 }
2183
2184 uint32_t helper_cntlzw32(uint32_t val)
2185 {
2186 return clz32(val);
2187 }
2188
2189 /* 440 specific */
2190 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2191 target_ulong low, uint32_t update_Rc)
2192 {
2193 target_ulong mask;
2194 int i;
2195
2196 i = 1;
2197 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2198 if ((high & mask) == 0) {
2199 if (update_Rc) {
2200 env->crf[0] = 0x4;
2201 }
2202 goto done;
2203 }
2204 i++;
2205 }
2206 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2207 if ((low & mask) == 0) {
2208 if (update_Rc) {
2209 env->crf[0] = 0x8;
2210 }
2211 goto done;
2212 }
2213 i++;
2214 }
2215 if (update_Rc) {
2216 env->crf[0] = 0x2;
2217 }
2218 done:
2219 env->xer = (env->xer & ~0x7F) | i;
2220 if (update_Rc) {
2221 env->crf[0] |= xer_so;
2222 }
2223 return i;
2224 }