]> git.proxmox.com Git - qemu.git/blame - target-ppc/int_helper.c
ppc: Split integer and vector ops
[qemu.git] / target-ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include "cpu.h"
20#include "dyngen-exec.h"
21#include "host-utils.h"
22#include "helper.h"
23
24#include "helper_regs.h"
25/*****************************************************************************/
26/* Fixed point operations helpers */
27#if defined(TARGET_PPC64)
28
29/* multiply high word */
30uint64_t helper_mulhd(uint64_t arg1, uint64_t arg2)
31{
32 uint64_t tl, th;
33
34 muls64(&tl, &th, arg1, arg2);
35 return th;
36}
37
38/* multiply high word unsigned */
39uint64_t helper_mulhdu(uint64_t arg1, uint64_t arg2)
40{
41 uint64_t tl, th;
42
43 mulu64(&tl, &th, arg1, arg2);
44 return th;
45}
46
47uint64_t helper_mulldo(uint64_t arg1, uint64_t arg2)
48{
49 int64_t th;
50 uint64_t tl;
51
52 muls64(&tl, (uint64_t *)&th, arg1, arg2);
53 /* If th != 0 && th != -1, then we had an overflow */
54 if (likely((uint64_t)(th + 1) <= 1)) {
55 env->xer &= ~(1 << XER_OV);
56 } else {
57 env->xer |= (1 << XER_OV) | (1 << XER_SO);
58 }
59 return (int64_t)tl;
60}
61#endif
62
63target_ulong helper_cntlzw(target_ulong t)
64{
65 return clz32(t);
66}
67
68#if defined(TARGET_PPC64)
69target_ulong helper_cntlzd(target_ulong t)
70{
71 return clz64(t);
72}
73#endif
74
75/* shift right arithmetic helper */
76target_ulong helper_sraw(target_ulong value, target_ulong shift)
77{
78 int32_t ret;
79
80 if (likely(!(shift & 0x20))) {
81 if (likely((uint32_t)shift != 0)) {
82 shift &= 0x1f;
83 ret = (int32_t)value >> shift;
84 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
85 env->xer &= ~(1 << XER_CA);
86 } else {
87 env->xer |= (1 << XER_CA);
88 }
89 } else {
90 ret = (int32_t)value;
91 env->xer &= ~(1 << XER_CA);
92 }
93 } else {
94 ret = (int32_t)value >> 31;
95 if (ret) {
96 env->xer |= (1 << XER_CA);
97 } else {
98 env->xer &= ~(1 << XER_CA);
99 }
100 }
101 return (target_long)ret;
102}
103
104#if defined(TARGET_PPC64)
105target_ulong helper_srad(target_ulong value, target_ulong shift)
106{
107 int64_t ret;
108
109 if (likely(!(shift & 0x40))) {
110 if (likely((uint64_t)shift != 0)) {
111 shift &= 0x3f;
112 ret = (int64_t)value >> shift;
113 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
114 env->xer &= ~(1 << XER_CA);
115 } else {
116 env->xer |= (1 << XER_CA);
117 }
118 } else {
119 ret = (int64_t)value;
120 env->xer &= ~(1 << XER_CA);
121 }
122 } else {
123 ret = (int64_t)value >> 63;
124 if (ret) {
125 env->xer |= (1 << XER_CA);
126 } else {
127 env->xer &= ~(1 << XER_CA);
128 }
129 }
130 return ret;
131}
132#endif
133
134#if defined(TARGET_PPC64)
135target_ulong helper_popcntb(target_ulong val)
136{
137 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
138 0x5555555555555555ULL);
139 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
140 0x3333333333333333ULL);
141 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
142 0x0f0f0f0f0f0f0f0fULL);
143 return val;
144}
145
146target_ulong helper_popcntw(target_ulong val)
147{
148 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
149 0x5555555555555555ULL);
150 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
151 0x3333333333333333ULL);
152 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
153 0x0f0f0f0f0f0f0f0fULL);
154 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
155 0x00ff00ff00ff00ffULL);
156 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
157 0x0000ffff0000ffffULL);
158 return val;
159}
160
161target_ulong helper_popcntd(target_ulong val)
162{
163 return ctpop64(val);
164}
165#else
166target_ulong helper_popcntb(target_ulong val)
167{
168 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
169 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
170 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
171 return val;
172}
173
174target_ulong helper_popcntw(target_ulong val)
175{
176 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
177 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
178 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
179 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
180 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
181 return val;
182}
183#endif
184
185/*****************************************************************************/
186/* PowerPC 601 specific instructions (POWER bridge) */
187target_ulong helper_div(target_ulong arg1, target_ulong arg2)
188{
189 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
190
191 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
192 (int32_t)arg2 == 0) {
193 env->spr[SPR_MQ] = 0;
194 return INT32_MIN;
195 } else {
196 env->spr[SPR_MQ] = tmp % arg2;
197 return tmp / (int32_t)arg2;
198 }
199}
200
201target_ulong helper_divo(target_ulong arg1, target_ulong arg2)
202{
203 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
204
205 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
206 (int32_t)arg2 == 0) {
207 env->xer |= (1 << XER_OV) | (1 << XER_SO);
208 env->spr[SPR_MQ] = 0;
209 return INT32_MIN;
210 } else {
211 env->spr[SPR_MQ] = tmp % arg2;
212 tmp /= (int32_t)arg2;
213 if ((int32_t)tmp != tmp) {
214 env->xer |= (1 << XER_OV) | (1 << XER_SO);
215 } else {
216 env->xer &= ~(1 << XER_OV);
217 }
218 return tmp;
219 }
220}
221
222target_ulong helper_divs(target_ulong arg1, target_ulong arg2)
223{
224 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
225 (int32_t)arg2 == 0) {
226 env->spr[SPR_MQ] = 0;
227 return INT32_MIN;
228 } else {
229 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
230 return (int32_t)arg1 / (int32_t)arg2;
231 }
232}
233
234target_ulong helper_divso(target_ulong arg1, target_ulong arg2)
235{
236 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
237 (int32_t)arg2 == 0) {
238 env->xer |= (1 << XER_OV) | (1 << XER_SO);
239 env->spr[SPR_MQ] = 0;
240 return INT32_MIN;
241 } else {
242 env->xer &= ~(1 << XER_OV);
243 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
244 return (int32_t)arg1 / (int32_t)arg2;
245 }
246}
247
248/*****************************************************************************/
249/* 602 specific instructions */
250/* mfrom is the most crazy instruction ever seen, imho ! */
251/* Real implementation uses a ROM table. Do the same */
252/* Extremely decomposed:
253 * -arg / 256
254 * return 256 * log10(10 + 1.0) + 0.5
255 */
256#if !defined(CONFIG_USER_ONLY)
257target_ulong helper_602_mfrom(target_ulong arg)
258{
259 if (likely(arg < 602)) {
260#include "mfrom_table.c"
261 return mfrom_ROM_table[arg];
262 } else {
263 return 0;
264 }
265}
266#endif
267
268/*****************************************************************************/
269/* Altivec extension helpers */
270#if defined(HOST_WORDS_BIGENDIAN)
271#define HI_IDX 0
272#define LO_IDX 1
273#else
274#define HI_IDX 1
275#define LO_IDX 0
276#endif
277
278#if defined(HOST_WORDS_BIGENDIAN)
279#define VECTOR_FOR_INORDER_I(index, element) \
280 for (index = 0; index < ARRAY_SIZE(r->element); index++)
281#else
282#define VECTOR_FOR_INORDER_I(index, element) \
283 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
284#endif
285
286/* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise,
287 * execute the following block. */
288#define DO_HANDLE_NAN(result, x) \
289 if (float32_is_any_nan(x)) { \
290 CPU_FloatU __f; \
291 __f.f = x; \
292 __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \
293 result = __f.f; \
294 } else
295
296#define HANDLE_NAN1(result, x) \
297 DO_HANDLE_NAN(result, x)
298#define HANDLE_NAN2(result, x, y) \
299 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
300#define HANDLE_NAN3(result, x, y, z) \
301 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
302
303/* Saturating arithmetic helpers. */
304#define SATCVT(from, to, from_type, to_type, min, max) \
305 static inline to_type cvt##from##to(from_type x, int *sat) \
306 { \
307 to_type r; \
308 \
309 if (x < (from_type)min) { \
310 r = min; \
311 *sat = 1; \
312 } else if (x > (from_type)max) { \
313 r = max; \
314 *sat = 1; \
315 } else { \
316 r = x; \
317 } \
318 return r; \
319 }
320#define SATCVTU(from, to, from_type, to_type, min, max) \
321 static inline to_type cvt##from##to(from_type x, int *sat) \
322 { \
323 to_type r; \
324 \
325 if (x > (from_type)max) { \
326 r = max; \
327 *sat = 1; \
328 } else { \
329 r = x; \
330 } \
331 return r; \
332 }
333SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
334SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
335SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
336
337SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
338SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
339SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
340SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
341SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
342SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
343#undef SATCVT
344#undef SATCVTU
345
346void helper_lvsl(ppc_avr_t *r, target_ulong sh)
347{
348 int i, j = (sh & 0xf);
349
350 VECTOR_FOR_INORDER_I(i, u8) {
351 r->u8[i] = j++;
352 }
353}
354
355void helper_lvsr(ppc_avr_t *r, target_ulong sh)
356{
357 int i, j = 0x10 - (sh & 0xf);
358
359 VECTOR_FOR_INORDER_I(i, u8) {
360 r->u8[i] = j++;
361 }
362}
363
364void helper_mtvscr(ppc_avr_t *r)
365{
366#if defined(HOST_WORDS_BIGENDIAN)
367 env->vscr = r->u32[3];
368#else
369 env->vscr = r->u32[0];
370#endif
371 set_flush_to_zero(vscr_nj, &env->vec_status);
372}
373
374void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
375{
376 int i;
377
378 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
379 r->u32[i] = ~a->u32[i] < b->u32[i];
380 }
381}
382
383#define VARITH_DO(name, op, element) \
384 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
385 { \
386 int i; \
387 \
388 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
389 r->element[i] = a->element[i] op b->element[i]; \
390 } \
391 }
392#define VARITH(suffix, element) \
393 VARITH_DO(add##suffix, +, element) \
394 VARITH_DO(sub##suffix, -, element)
395VARITH(ubm, u8)
396VARITH(uhm, u16)
397VARITH(uwm, u32)
398#undef VARITH_DO
399#undef VARITH
400
401#define VARITHFP(suffix, func) \
402 void helper_v##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
403 { \
404 int i; \
405 \
406 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
407 HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \
408 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
409 } \
410 } \
411 }
412VARITHFP(addfp, float32_add)
413VARITHFP(subfp, float32_sub)
414#undef VARITHFP
415
416#define VARITHSAT_CASE(type, op, cvt, element) \
417 { \
418 type result = (type)a->element[i] op (type)b->element[i]; \
419 r->element[i] = cvt(result, &sat); \
420 }
421
422#define VARITHSAT_DO(name, op, optype, cvt, element) \
423 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
424 { \
425 int sat = 0; \
426 int i; \
427 \
428 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
429 switch (sizeof(r->element[0])) { \
430 case 1: \
431 VARITHSAT_CASE(optype, op, cvt, element); \
432 break; \
433 case 2: \
434 VARITHSAT_CASE(optype, op, cvt, element); \
435 break; \
436 case 4: \
437 VARITHSAT_CASE(optype, op, cvt, element); \
438 break; \
439 } \
440 } \
441 if (sat) { \
442 env->vscr |= (1 << VSCR_SAT); \
443 } \
444 }
445#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
446 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
447 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
448#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
449 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
450 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
451VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
452VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
453VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
454VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
455VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
456VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
457#undef VARITHSAT_CASE
458#undef VARITHSAT_DO
459#undef VARITHSAT_SIGNED
460#undef VARITHSAT_UNSIGNED
461
462#define VAVG_DO(name, element, etype) \
463 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
464 { \
465 int i; \
466 \
467 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
468 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
469 r->element[i] = x >> 1; \
470 } \
471 }
472
473#define VAVG(type, signed_element, signed_type, unsigned_element, \
474 unsigned_type) \
475 VAVG_DO(avgs##type, signed_element, signed_type) \
476 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
477VAVG(b, s8, int16_t, u8, uint16_t)
478VAVG(h, s16, int32_t, u16, uint32_t)
479VAVG(w, s32, int64_t, u32, uint64_t)
480#undef VAVG_DO
481#undef VAVG
482
483#define VCF(suffix, cvt, element) \
484 void helper_vcf##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t uim) \
485 { \
486 int i; \
487 \
488 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
489 float32 t = cvt(b->element[i], &env->vec_status); \
490 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
491 } \
492 }
493VCF(ux, uint32_to_float32, u32)
494VCF(sx, int32_to_float32, s32)
495#undef VCF
496
497#define VCMP_DO(suffix, compare, element, record) \
498 void helper_vcmp##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
499 { \
500 uint32_t ones = (uint32_t)-1; \
501 uint32_t all = ones; \
502 uint32_t none = 0; \
503 int i; \
504 \
505 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
506 uint32_t result = (a->element[i] compare b->element[i] ? \
507 ones : 0x0); \
508 switch (sizeof(a->element[0])) { \
509 case 4: \
510 r->u32[i] = result; \
511 break; \
512 case 2: \
513 r->u16[i] = result; \
514 break; \
515 case 1: \
516 r->u8[i] = result; \
517 break; \
518 } \
519 all &= result; \
520 none |= result; \
521 } \
522 if (record) { \
523 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
524 } \
525 }
526#define VCMP(suffix, compare, element) \
527 VCMP_DO(suffix, compare, element, 0) \
528 VCMP_DO(suffix##_dot, compare, element, 1)
529VCMP(equb, ==, u8)
530VCMP(equh, ==, u16)
531VCMP(equw, ==, u32)
532VCMP(gtub, >, u8)
533VCMP(gtuh, >, u16)
534VCMP(gtuw, >, u32)
535VCMP(gtsb, >, s8)
536VCMP(gtsh, >, s16)
537VCMP(gtsw, >, s32)
538#undef VCMP_DO
539#undef VCMP
540
541#define VCMPFP_DO(suffix, compare, order, record) \
542 void helper_vcmp##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
543 { \
544 uint32_t ones = (uint32_t)-1; \
545 uint32_t all = ones; \
546 uint32_t none = 0; \
547 int i; \
548 \
549 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
550 uint32_t result; \
551 int rel = float32_compare_quiet(a->f[i], b->f[i], \
552 &env->vec_status); \
553 if (rel == float_relation_unordered) { \
554 result = 0; \
555 } else if (rel compare order) { \
556 result = ones; \
557 } else { \
558 result = 0; \
559 } \
560 r->u32[i] = result; \
561 all &= result; \
562 none |= result; \
563 } \
564 if (record) { \
565 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
566 } \
567 }
568#define VCMPFP(suffix, compare, order) \
569 VCMPFP_DO(suffix, compare, order, 0) \
570 VCMPFP_DO(suffix##_dot, compare, order, 1)
571VCMPFP(eqfp, ==, float_relation_equal)
572VCMPFP(gefp, !=, float_relation_less)
573VCMPFP(gtfp, ==, float_relation_greater)
574#undef VCMPFP_DO
575#undef VCMPFP
576
577static inline void vcmpbfp_internal(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
578 int record)
579{
580 int i;
581 int all_in = 0;
582
583 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
584 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
585 if (le_rel == float_relation_unordered) {
586 r->u32[i] = 0xc0000000;
587 /* ALL_IN does not need to be updated here. */
588 } else {
589 float32 bneg = float32_chs(b->f[i]);
590 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
591 int le = le_rel != float_relation_greater;
592 int ge = ge_rel != float_relation_less;
593
594 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
595 all_in |= (!le | !ge);
596 }
597 }
598 if (record) {
599 env->crf[6] = (all_in == 0) << 1;
600 }
601}
602
603void helper_vcmpbfp(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
604{
605 vcmpbfp_internal(r, a, b, 0);
606}
607
608void helper_vcmpbfp_dot(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
609{
610 vcmpbfp_internal(r, a, b, 1);
611}
612
613#define VCT(suffix, satcvt, element) \
614 void helper_vct##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t uim) \
615 { \
616 int i; \
617 int sat = 0; \
618 float_status s = env->vec_status; \
619 \
620 set_float_rounding_mode(float_round_to_zero, &s); \
621 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
622 if (float32_is_any_nan(b->f[i])) { \
623 r->element[i] = 0; \
624 } else { \
625 float64 t = float32_to_float64(b->f[i], &s); \
626 int64_t j; \
627 \
628 t = float64_scalbn(t, uim, &s); \
629 j = float64_to_int64(t, &s); \
630 r->element[i] = satcvt(j, &sat); \
631 } \
632 } \
633 if (sat) { \
634 env->vscr |= (1 << VSCR_SAT); \
635 } \
636 }
637VCT(uxs, cvtsduw, u32)
638VCT(sxs, cvtsdsw, s32)
639#undef VCT
640
641void helper_vmaddfp(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
642{
643 int i;
644
645 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
646 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
647 /* Need to do the computation in higher precision and round
648 * once at the end. */
649 float64 af, bf, cf, t;
650
651 af = float32_to_float64(a->f[i], &env->vec_status);
652 bf = float32_to_float64(b->f[i], &env->vec_status);
653 cf = float32_to_float64(c->f[i], &env->vec_status);
654 t = float64_mul(af, cf, &env->vec_status);
655 t = float64_add(t, bf, &env->vec_status);
656 r->f[i] = float64_to_float32(t, &env->vec_status);
657 }
658 }
659}
660
661void helper_vmhaddshs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
662{
663 int sat = 0;
664 int i;
665
666 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
667 int32_t prod = a->s16[i] * b->s16[i];
668 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
669
670 r->s16[i] = cvtswsh(t, &sat);
671 }
672
673 if (sat) {
674 env->vscr |= (1 << VSCR_SAT);
675 }
676}
677
678void helper_vmhraddshs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
679{
680 int sat = 0;
681 int i;
682
683 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
684 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
685 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
686 r->s16[i] = cvtswsh(t, &sat);
687 }
688
689 if (sat) {
690 env->vscr |= (1 << VSCR_SAT);
691 }
692}
693
694#define VMINMAX_DO(name, compare, element) \
695 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
696 { \
697 int i; \
698 \
699 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
700 if (a->element[i] compare b->element[i]) { \
701 r->element[i] = b->element[i]; \
702 } else { \
703 r->element[i] = a->element[i]; \
704 } \
705 } \
706 }
707#define VMINMAX(suffix, element) \
708 VMINMAX_DO(min##suffix, >, element) \
709 VMINMAX_DO(max##suffix, <, element)
710VMINMAX(sb, s8)
711VMINMAX(sh, s16)
712VMINMAX(sw, s32)
713VMINMAX(ub, u8)
714VMINMAX(uh, u16)
715VMINMAX(uw, u32)
716#undef VMINMAX_DO
717#undef VMINMAX
718
719#define VMINMAXFP(suffix, rT, rF) \
720 void helper_v##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
721 { \
722 int i; \
723 \
724 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
725 HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \
726 if (float32_lt_quiet(a->f[i], b->f[i], \
727 &env->vec_status)) { \
728 r->f[i] = rT->f[i]; \
729 } else { \
730 r->f[i] = rF->f[i]; \
731 } \
732 } \
733 } \
734 }
735VMINMAXFP(minfp, a, b)
736VMINMAXFP(maxfp, b, a)
737#undef VMINMAXFP
738
739void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
740{
741 int i;
742
743 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
744 int32_t prod = a->s16[i] * b->s16[i];
745 r->s16[i] = (int16_t) (prod + c->s16[i]);
746 }
747}
748
749#define VMRG_DO(name, element, highp) \
750 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
751 { \
752 ppc_avr_t result; \
753 int i; \
754 size_t n_elems = ARRAY_SIZE(r->element); \
755 \
756 for (i = 0; i < n_elems / 2; i++) { \
757 if (highp) { \
758 result.element[i*2+HI_IDX] = a->element[i]; \
759 result.element[i*2+LO_IDX] = b->element[i]; \
760 } else { \
761 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
762 b->element[n_elems - i - 1]; \
763 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
764 a->element[n_elems - i - 1]; \
765 } \
766 } \
767 *r = result; \
768 }
769#if defined(HOST_WORDS_BIGENDIAN)
770#define MRGHI 0
771#define MRGLO 1
772#else
773#define MRGHI 1
774#define MRGLO 0
775#endif
776#define VMRG(suffix, element) \
777 VMRG_DO(mrgl##suffix, element, MRGHI) \
778 VMRG_DO(mrgh##suffix, element, MRGLO)
779VMRG(b, u8)
780VMRG(h, u16)
781VMRG(w, u32)
782#undef VMRG_DO
783#undef VMRG
784#undef MRGHI
785#undef MRGLO
786
787void helper_vmsummbm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
788{
789 int32_t prod[16];
790 int i;
791
792 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
793 prod[i] = (int32_t)a->s8[i] * b->u8[i];
794 }
795
796 VECTOR_FOR_INORDER_I(i, s32) {
797 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
798 prod[4 * i + 2] + prod[4 * i + 3];
799 }
800}
801
802void helper_vmsumshm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
803{
804 int32_t prod[8];
805 int i;
806
807 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
808 prod[i] = a->s16[i] * b->s16[i];
809 }
810
811 VECTOR_FOR_INORDER_I(i, s32) {
812 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
813 }
814}
815
816void helper_vmsumshs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
817{
818 int32_t prod[8];
819 int i;
820 int sat = 0;
821
822 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
823 prod[i] = (int32_t)a->s16[i] * b->s16[i];
824 }
825
826 VECTOR_FOR_INORDER_I(i, s32) {
827 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
828
829 r->u32[i] = cvtsdsw(t, &sat);
830 }
831
832 if (sat) {
833 env->vscr |= (1 << VSCR_SAT);
834 }
835}
836
837void helper_vmsumubm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
838{
839 uint16_t prod[16];
840 int i;
841
842 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
843 prod[i] = a->u8[i] * b->u8[i];
844 }
845
846 VECTOR_FOR_INORDER_I(i, u32) {
847 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
848 prod[4 * i + 2] + prod[4 * i + 3];
849 }
850}
851
852void helper_vmsumuhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
853{
854 uint32_t prod[8];
855 int i;
856
857 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
858 prod[i] = a->u16[i] * b->u16[i];
859 }
860
861 VECTOR_FOR_INORDER_I(i, u32) {
862 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
863 }
864}
865
866void helper_vmsumuhs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
867{
868 uint32_t prod[8];
869 int i;
870 int sat = 0;
871
872 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
873 prod[i] = a->u16[i] * b->u16[i];
874 }
875
876 VECTOR_FOR_INORDER_I(i, s32) {
877 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
878
879 r->u32[i] = cvtuduw(t, &sat);
880 }
881
882 if (sat) {
883 env->vscr |= (1 << VSCR_SAT);
884 }
885}
886
887#define VMUL_DO(name, mul_element, prod_element, evenp) \
888 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
889 { \
890 int i; \
891 \
892 VECTOR_FOR_INORDER_I(i, prod_element) { \
893 if (evenp) { \
894 r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] * \
895 b->mul_element[i * 2 + HI_IDX]; \
896 } else { \
897 r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] * \
898 b->mul_element[i * 2 + LO_IDX]; \
899 } \
900 } \
901 }
902#define VMUL(suffix, mul_element, prod_element) \
903 VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
904 VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
905VMUL(sb, s8, s16)
906VMUL(sh, s16, s32)
907VMUL(ub, u8, u16)
908VMUL(uh, u16, u32)
909#undef VMUL_DO
910#undef VMUL
911
912void helper_vnmsubfp(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
913{
914 int i;
915
916 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
917 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
918 /* Need to do the computation is higher precision and round
919 * once at the end. */
920 float64 af, bf, cf, t;
921
922 af = float32_to_float64(a->f[i], &env->vec_status);
923 bf = float32_to_float64(b->f[i], &env->vec_status);
924 cf = float32_to_float64(c->f[i], &env->vec_status);
925 t = float64_mul(af, cf, &env->vec_status);
926 t = float64_sub(t, bf, &env->vec_status);
927 t = float64_chs(t);
928 r->f[i] = float64_to_float32(t, &env->vec_status);
929 }
930 }
931}
932
933void helper_vperm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
934{
935 ppc_avr_t result;
936 int i;
937
938 VECTOR_FOR_INORDER_I(i, u8) {
939 int s = c->u8[i] & 0x1f;
940#if defined(HOST_WORDS_BIGENDIAN)
941 int index = s & 0xf;
942#else
943 int index = 15 - (s & 0xf);
944#endif
945
946 if (s & 0x10) {
947 result.u8[i] = b->u8[index];
948 } else {
949 result.u8[i] = a->u8[index];
950 }
951 }
952 *r = result;
953}
954
955#if defined(HOST_WORDS_BIGENDIAN)
956#define PKBIG 1
957#else
958#define PKBIG 0
959#endif
960void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
961{
962 int i, j;
963 ppc_avr_t result;
964#if defined(HOST_WORDS_BIGENDIAN)
965 const ppc_avr_t *x[2] = { a, b };
966#else
967 const ppc_avr_t *x[2] = { b, a };
968#endif
969
970 VECTOR_FOR_INORDER_I(i, u64) {
971 VECTOR_FOR_INORDER_I(j, u32) {
972 uint32_t e = x[i]->u32[j];
973
974 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
975 ((e >> 6) & 0x3e0) |
976 ((e >> 3) & 0x1f));
977 }
978 }
979 *r = result;
980}
981
982#define VPK(suffix, from, to, cvt, dosat) \
983 void helper_vpk##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
984 { \
985 int i; \
986 int sat = 0; \
987 ppc_avr_t result; \
988 ppc_avr_t *a0 = PKBIG ? a : b; \
989 ppc_avr_t *a1 = PKBIG ? b : a; \
990 \
991 VECTOR_FOR_INORDER_I(i, from) { \
992 result.to[i] = cvt(a0->from[i], &sat); \
993 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
994 } \
995 *r = result; \
996 if (dosat && sat) { \
997 env->vscr |= (1 << VSCR_SAT); \
998 } \
999 }
1000#define I(x, y) (x)
1001VPK(shss, s16, s8, cvtshsb, 1)
1002VPK(shus, s16, u8, cvtshub, 1)
1003VPK(swss, s32, s16, cvtswsh, 1)
1004VPK(swus, s32, u16, cvtswuh, 1)
1005VPK(uhus, u16, u8, cvtuhub, 1)
1006VPK(uwus, u32, u16, cvtuwuh, 1)
1007VPK(uhum, u16, u8, I, 0)
1008VPK(uwum, u32, u16, I, 0)
1009#undef I
1010#undef VPK
1011#undef PKBIG
1012
1013void helper_vrefp(ppc_avr_t *r, ppc_avr_t *b)
1014{
1015 int i;
1016
1017 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1018 HANDLE_NAN1(r->f[i], b->f[i]) {
1019 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1020 }
1021 }
1022}
1023
1024#define VRFI(suffix, rounding) \
1025 void helper_vrfi##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1026 { \
1027 int i; \
1028 float_status s = env->vec_status; \
1029 \
1030 set_float_rounding_mode(rounding, &s); \
1031 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1032 HANDLE_NAN1(r->f[i], b->f[i]) { \
1033 r->f[i] = float32_round_to_int (b->f[i], &s); \
1034 } \
1035 } \
1036 }
1037VRFI(n, float_round_nearest_even)
1038VRFI(m, float_round_down)
1039VRFI(p, float_round_up)
1040VRFI(z, float_round_to_zero)
1041#undef VRFI
1042
1043#define VROTATE(suffix, element) \
1044 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1045 { \
1046 int i; \
1047 \
1048 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1049 unsigned int mask = ((1 << \
1050 (3 + (sizeof(a->element[0]) >> 1))) \
1051 - 1); \
1052 unsigned int shift = b->element[i] & mask; \
1053 r->element[i] = (a->element[i] << shift) | \
1054 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1055 } \
1056 }
1057VROTATE(b, u8)
1058VROTATE(h, u16)
1059VROTATE(w, u32)
1060#undef VROTATE
1061
1062void helper_vrsqrtefp(ppc_avr_t *r, ppc_avr_t *b)
1063{
1064 int i;
1065
1066 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1067 HANDLE_NAN1(r->f[i], b->f[i]) {
1068 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1069
1070 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1071 }
1072 }
1073}
1074
1075void helper_vsel(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1076{
1077 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1078 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1079}
1080
1081void helper_vexptefp(ppc_avr_t *r, ppc_avr_t *b)
1082{
1083 int i;
1084
1085 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1086 HANDLE_NAN1(r->f[i], b->f[i]) {
1087 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1088 }
1089 }
1090}
1091
1092void helper_vlogefp(ppc_avr_t *r, ppc_avr_t *b)
1093{
1094 int i;
1095
1096 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1097 HANDLE_NAN1(r->f[i], b->f[i]) {
1098 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1099 }
1100 }
1101}
1102
1103#if defined(HOST_WORDS_BIGENDIAN)
1104#define LEFT 0
1105#define RIGHT 1
1106#else
1107#define LEFT 1
1108#define RIGHT 0
1109#endif
1110/* The specification says that the results are undefined if all of the
1111 * shift counts are not identical. We check to make sure that they are
1112 * to conform to what real hardware appears to do. */
1113#define VSHIFT(suffix, leftp) \
1114 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1115 { \
1116 int shift = b->u8[LO_IDX*15] & 0x7; \
1117 int doit = 1; \
1118 int i; \
1119 \
1120 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1121 doit = doit && ((b->u8[i] & 0x7) == shift); \
1122 } \
1123 if (doit) { \
1124 if (shift == 0) { \
1125 *r = *a; \
1126 } else if (leftp) { \
1127 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1128 \
1129 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1130 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1131 } else { \
1132 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1133 \
1134 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1135 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1136 } \
1137 } \
1138 }
1139VSHIFT(l, LEFT)
1140VSHIFT(r, RIGHT)
1141#undef VSHIFT
1142#undef LEFT
1143#undef RIGHT
1144
1145#define VSL(suffix, element) \
1146 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1147 { \
1148 int i; \
1149 \
1150 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1151 unsigned int mask = ((1 << \
1152 (3 + (sizeof(a->element[0]) >> 1))) \
1153 - 1); \
1154 unsigned int shift = b->element[i] & mask; \
1155 \
1156 r->element[i] = a->element[i] << shift; \
1157 } \
1158 }
1159VSL(b, u8)
1160VSL(h, u16)
1161VSL(w, u32)
1162#undef VSL
1163
1164void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1165{
1166 int sh = shift & 0xf;
1167 int i;
1168 ppc_avr_t result;
1169
1170#if defined(HOST_WORDS_BIGENDIAN)
1171 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1172 int index = sh + i;
1173 if (index > 0xf) {
1174 result.u8[i] = b->u8[index - 0x10];
1175 } else {
1176 result.u8[i] = a->u8[index];
1177 }
1178 }
1179#else
1180 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1181 int index = (16 - sh) + i;
1182 if (index > 0xf) {
1183 result.u8[i] = a->u8[index - 0x10];
1184 } else {
1185 result.u8[i] = b->u8[index];
1186 }
1187 }
1188#endif
1189 *r = result;
1190}
1191
1192void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1193{
1194 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1195
1196#if defined(HOST_WORDS_BIGENDIAN)
1197 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1198 memset(&r->u8[16-sh], 0, sh);
1199#else
1200 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1201 memset(&r->u8[0], 0, sh);
1202#endif
1203}
1204
1205/* Experimental testing shows that hardware masks the immediate. */
1206#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1207#if defined(HOST_WORDS_BIGENDIAN)
1208#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1209#else
1210#define SPLAT_ELEMENT(element) \
1211 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1212#endif
1213#define VSPLT(suffix, element) \
1214 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1215 { \
1216 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1217 int i; \
1218 \
1219 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1220 r->element[i] = s; \
1221 } \
1222 }
1223VSPLT(b, u8)
1224VSPLT(h, u16)
1225VSPLT(w, u32)
1226#undef VSPLT
1227#undef SPLAT_ELEMENT
1228#undef _SPLAT_MASKED
1229
1230#define VSPLTI(suffix, element, splat_type) \
1231 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1232 { \
1233 splat_type x = (int8_t)(splat << 3) >> 3; \
1234 int i; \
1235 \
1236 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1237 r->element[i] = x; \
1238 } \
1239 }
1240VSPLTI(b, s8, int8_t)
1241VSPLTI(h, s16, int16_t)
1242VSPLTI(w, s32, int32_t)
1243#undef VSPLTI
1244
1245#define VSR(suffix, element) \
1246 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1247 { \
1248 int i; \
1249 \
1250 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1251 unsigned int mask = ((1 << \
1252 (3 + (sizeof(a->element[0]) >> 1))) \
1253 - 1); \
1254 unsigned int shift = b->element[i] & mask; \
1255 \
1256 r->element[i] = a->element[i] >> shift; \
1257 } \
1258 }
1259VSR(ab, s8)
1260VSR(ah, s16)
1261VSR(aw, s32)
1262VSR(b, u8)
1263VSR(h, u16)
1264VSR(w, u32)
1265#undef VSR
1266
1267void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1268{
1269 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1270
1271#if defined(HOST_WORDS_BIGENDIAN)
1272 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1273 memset(&r->u8[0], 0, sh);
1274#else
1275 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1276 memset(&r->u8[16 - sh], 0, sh);
1277#endif
1278}
1279
1280void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1281{
1282 int i;
1283
1284 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1285 r->u32[i] = a->u32[i] >= b->u32[i];
1286 }
1287}
1288
1289void helper_vsumsws(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1290{
1291 int64_t t;
1292 int i, upper;
1293 ppc_avr_t result;
1294 int sat = 0;
1295
1296#if defined(HOST_WORDS_BIGENDIAN)
1297 upper = ARRAY_SIZE(r->s32)-1;
1298#else
1299 upper = 0;
1300#endif
1301 t = (int64_t)b->s32[upper];
1302 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1303 t += a->s32[i];
1304 result.s32[i] = 0;
1305 }
1306 result.s32[upper] = cvtsdsw(t, &sat);
1307 *r = result;
1308
1309 if (sat) {
1310 env->vscr |= (1 << VSCR_SAT);
1311 }
1312}
1313
1314void helper_vsum2sws(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1315{
1316 int i, j, upper;
1317 ppc_avr_t result;
1318 int sat = 0;
1319
1320#if defined(HOST_WORDS_BIGENDIAN)
1321 upper = 1;
1322#else
1323 upper = 0;
1324#endif
1325 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1326 int64_t t = (int64_t)b->s32[upper + i * 2];
1327
1328 result.u64[i] = 0;
1329 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1330 t += a->s32[2 * i + j];
1331 }
1332 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1333 }
1334
1335 *r = result;
1336 if (sat) {
1337 env->vscr |= (1 << VSCR_SAT);
1338 }
1339}
1340
1341void helper_vsum4sbs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1342{
1343 int i, j;
1344 int sat = 0;
1345
1346 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1347 int64_t t = (int64_t)b->s32[i];
1348
1349 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1350 t += a->s8[4 * i + j];
1351 }
1352 r->s32[i] = cvtsdsw(t, &sat);
1353 }
1354
1355 if (sat) {
1356 env->vscr |= (1 << VSCR_SAT);
1357 }
1358}
1359
1360void helper_vsum4shs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1361{
1362 int sat = 0;
1363 int i;
1364
1365 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1366 int64_t t = (int64_t)b->s32[i];
1367
1368 t += a->s16[2 * i] + a->s16[2 * i + 1];
1369 r->s32[i] = cvtsdsw(t, &sat);
1370 }
1371
1372 if (sat) {
1373 env->vscr |= (1 << VSCR_SAT);
1374 }
1375}
1376
1377void helper_vsum4ubs(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1378{
1379 int i, j;
1380 int sat = 0;
1381
1382 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1383 uint64_t t = (uint64_t)b->u32[i];
1384
1385 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1386 t += a->u8[4 * i + j];
1387 }
1388 r->u32[i] = cvtuduw(t, &sat);
1389 }
1390
1391 if (sat) {
1392 env->vscr |= (1 << VSCR_SAT);
1393 }
1394}
1395
1396#if defined(HOST_WORDS_BIGENDIAN)
1397#define UPKHI 1
1398#define UPKLO 0
1399#else
1400#define UPKHI 0
1401#define UPKLO 1
1402#endif
1403#define VUPKPX(suffix, hi) \
1404 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1405 { \
1406 int i; \
1407 ppc_avr_t result; \
1408 \
1409 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1410 uint16_t e = b->u16[hi ? i : i+4]; \
1411 uint8_t a = (e >> 15) ? 0xff : 0; \
1412 uint8_t r = (e >> 10) & 0x1f; \
1413 uint8_t g = (e >> 5) & 0x1f; \
1414 uint8_t b = e & 0x1f; \
1415 \
1416 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1417 } \
1418 *r = result; \
1419 }
1420VUPKPX(lpx, UPKLO)
1421VUPKPX(hpx, UPKHI)
1422#undef VUPKPX
1423
1424#define VUPK(suffix, unpacked, packee, hi) \
1425 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1426 { \
1427 int i; \
1428 ppc_avr_t result; \
1429 \
1430 if (hi) { \
1431 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1432 result.unpacked[i] = b->packee[i]; \
1433 } \
1434 } else { \
1435 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1436 i++) { \
1437 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1438 } \
1439 } \
1440 *r = result; \
1441 }
1442VUPK(hsb, s16, s8, UPKHI)
1443VUPK(hsh, s32, s16, UPKHI)
1444VUPK(lsb, s16, s8, UPKLO)
1445VUPK(lsh, s32, s16, UPKLO)
1446#undef VUPK
1447#undef UPKHI
1448#undef UPKLO
1449
1450#undef DO_HANDLE_NAN
1451#undef HANDLE_NAN1
1452#undef HANDLE_NAN2
1453#undef HANDLE_NAN3
1454#undef VECTOR_FOR_INORDER_I
1455#undef HI_IDX
1456#undef LO_IDX
1457
1458/*****************************************************************************/
1459/* SPE extension helpers */
1460/* Use a table to make this quicker */
1461static uint8_t hbrev[16] = {
1462 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1463 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1464};
1465
1466static inline uint8_t byte_reverse(uint8_t val)
1467{
1468 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1469}
1470
1471static inline uint32_t word_reverse(uint32_t val)
1472{
1473 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1474 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1475}
1476
1477#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1478target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1479{
1480 uint32_t a, b, d, mask;
1481
1482 mask = UINT32_MAX >> (32 - MASKBITS);
1483 a = arg1 & mask;
1484 b = arg2 & mask;
1485 d = word_reverse(1 + word_reverse(a | ~b));
1486 return (arg1 & ~mask) | (d & b);
1487}
1488
1489uint32_t helper_cntlsw32(uint32_t val)
1490{
1491 if (val & 0x80000000) {
1492 return clz32(~val);
1493 } else {
1494 return clz32(val);
1495 }
1496}
1497
1498uint32_t helper_cntlzw32(uint32_t val)
1499{
1500 return clz32(val);
1501}
1502
1503/* 440 specific */
1504target_ulong helper_dlmzb(target_ulong high, target_ulong low,
1505 uint32_t update_Rc)
1506{
1507 target_ulong mask;
1508 int i;
1509
1510 i = 1;
1511 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1512 if ((high & mask) == 0) {
1513 if (update_Rc) {
1514 env->crf[0] = 0x4;
1515 }
1516 goto done;
1517 }
1518 i++;
1519 }
1520 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1521 if ((low & mask) == 0) {
1522 if (update_Rc) {
1523 env->crf[0] = 0x8;
1524 }
1525 goto done;
1526 }
1527 i++;
1528 }
1529 if (update_Rc) {
1530 env->crf[0] = 0x2;
1531 }
1532 done:
1533 env->xer = (env->xer & ~0x7F) | i;
1534 if (update_Rc) {
1535 env->crf[0] |= xer_so;
1536 }
1537 return i;
1538}