]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/int_helper.c
target-ppc: use the softfloat min/max functions
[mirror_qemu.git] / target-ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include "cpu.h"
64654ded
BS
20#include "host-utils.h"
21#include "helper.h"
22
23#include "helper_regs.h"
24/*****************************************************************************/
25/* Fixed point operations helpers */
26#if defined(TARGET_PPC64)
27
28/* multiply high word */
29uint64_t helper_mulhd(uint64_t arg1, uint64_t arg2)
30{
31 uint64_t tl, th;
32
33 muls64(&tl, &th, arg1, arg2);
34 return th;
35}
36
37/* multiply high word unsigned */
38uint64_t helper_mulhdu(uint64_t arg1, uint64_t arg2)
39{
40 uint64_t tl, th;
41
42 mulu64(&tl, &th, arg1, arg2);
43 return th;
44}
45
d15f74fb 46uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
64654ded
BS
47{
48 int64_t th;
49 uint64_t tl;
50
51 muls64(&tl, (uint64_t *)&th, arg1, arg2);
52 /* If th != 0 && th != -1, then we had an overflow */
53 if (likely((uint64_t)(th + 1) <= 1)) {
54 env->xer &= ~(1 << XER_OV);
55 } else {
56 env->xer |= (1 << XER_OV) | (1 << XER_SO);
57 }
58 return (int64_t)tl;
59}
60#endif
61
62target_ulong helper_cntlzw(target_ulong t)
63{
64 return clz32(t);
65}
66
67#if defined(TARGET_PPC64)
68target_ulong helper_cntlzd(target_ulong t)
69{
70 return clz64(t);
71}
72#endif
73
74/* shift right arithmetic helper */
d15f74fb
BS
75target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
76 target_ulong shift)
64654ded
BS
77{
78 int32_t ret;
79
80 if (likely(!(shift & 0x20))) {
81 if (likely((uint32_t)shift != 0)) {
82 shift &= 0x1f;
83 ret = (int32_t)value >> shift;
84 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
85 env->xer &= ~(1 << XER_CA);
86 } else {
87 env->xer |= (1 << XER_CA);
88 }
89 } else {
90 ret = (int32_t)value;
91 env->xer &= ~(1 << XER_CA);
92 }
93 } else {
94 ret = (int32_t)value >> 31;
95 if (ret) {
96 env->xer |= (1 << XER_CA);
97 } else {
98 env->xer &= ~(1 << XER_CA);
99 }
100 }
101 return (target_long)ret;
102}
103
104#if defined(TARGET_PPC64)
d15f74fb
BS
105target_ulong helper_srad(CPUPPCState *env, target_ulong value,
106 target_ulong shift)
64654ded
BS
107{
108 int64_t ret;
109
110 if (likely(!(shift & 0x40))) {
111 if (likely((uint64_t)shift != 0)) {
112 shift &= 0x3f;
113 ret = (int64_t)value >> shift;
114 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
115 env->xer &= ~(1 << XER_CA);
116 } else {
117 env->xer |= (1 << XER_CA);
118 }
119 } else {
120 ret = (int64_t)value;
121 env->xer &= ~(1 << XER_CA);
122 }
123 } else {
124 ret = (int64_t)value >> 63;
125 if (ret) {
126 env->xer |= (1 << XER_CA);
127 } else {
128 env->xer &= ~(1 << XER_CA);
129 }
130 }
131 return ret;
132}
133#endif
134
135#if defined(TARGET_PPC64)
136target_ulong helper_popcntb(target_ulong val)
137{
138 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
139 0x5555555555555555ULL);
140 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
141 0x3333333333333333ULL);
142 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
143 0x0f0f0f0f0f0f0f0fULL);
144 return val;
145}
146
147target_ulong helper_popcntw(target_ulong val)
148{
149 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
150 0x5555555555555555ULL);
151 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
152 0x3333333333333333ULL);
153 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
154 0x0f0f0f0f0f0f0f0fULL);
155 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
156 0x00ff00ff00ff00ffULL);
157 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
158 0x0000ffff0000ffffULL);
159 return val;
160}
161
162target_ulong helper_popcntd(target_ulong val)
163{
164 return ctpop64(val);
165}
166#else
167target_ulong helper_popcntb(target_ulong val)
168{
169 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
170 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
171 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
172 return val;
173}
174
175target_ulong helper_popcntw(target_ulong val)
176{
177 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
178 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
179 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
180 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
181 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
182 return val;
183}
184#endif
185
186/*****************************************************************************/
187/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 188target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
189{
190 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
191
192 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
193 (int32_t)arg2 == 0) {
194 env->spr[SPR_MQ] = 0;
195 return INT32_MIN;
196 } else {
197 env->spr[SPR_MQ] = tmp % arg2;
198 return tmp / (int32_t)arg2;
199 }
200}
201
d15f74fb
BS
202target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
203 target_ulong arg2)
64654ded
BS
204{
205 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
206
207 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
208 (int32_t)arg2 == 0) {
209 env->xer |= (1 << XER_OV) | (1 << XER_SO);
210 env->spr[SPR_MQ] = 0;
211 return INT32_MIN;
212 } else {
213 env->spr[SPR_MQ] = tmp % arg2;
214 tmp /= (int32_t)arg2;
215 if ((int32_t)tmp != tmp) {
216 env->xer |= (1 << XER_OV) | (1 << XER_SO);
217 } else {
218 env->xer &= ~(1 << XER_OV);
219 }
220 return tmp;
221 }
222}
223
d15f74fb
BS
224target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
225 target_ulong arg2)
64654ded
BS
226{
227 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
228 (int32_t)arg2 == 0) {
229 env->spr[SPR_MQ] = 0;
230 return INT32_MIN;
231 } else {
232 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
233 return (int32_t)arg1 / (int32_t)arg2;
234 }
235}
236
d15f74fb
BS
237target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
238 target_ulong arg2)
64654ded
BS
239{
240 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
241 (int32_t)arg2 == 0) {
242 env->xer |= (1 << XER_OV) | (1 << XER_SO);
243 env->spr[SPR_MQ] = 0;
244 return INT32_MIN;
245 } else {
246 env->xer &= ~(1 << XER_OV);
247 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
248 return (int32_t)arg1 / (int32_t)arg2;
249 }
250}
251
252/*****************************************************************************/
253/* 602 specific instructions */
254/* mfrom is the most crazy instruction ever seen, imho ! */
255/* Real implementation uses a ROM table. Do the same */
256/* Extremely decomposed:
257 * -arg / 256
258 * return 256 * log10(10 + 1.0) + 0.5
259 */
260#if !defined(CONFIG_USER_ONLY)
261target_ulong helper_602_mfrom(target_ulong arg)
262{
263 if (likely(arg < 602)) {
264#include "mfrom_table.c"
265 return mfrom_ROM_table[arg];
266 } else {
267 return 0;
268 }
269}
270#endif
271
272/*****************************************************************************/
273/* Altivec extension helpers */
274#if defined(HOST_WORDS_BIGENDIAN)
275#define HI_IDX 0
276#define LO_IDX 1
277#else
278#define HI_IDX 1
279#define LO_IDX 0
280#endif
281
282#if defined(HOST_WORDS_BIGENDIAN)
283#define VECTOR_FOR_INORDER_I(index, element) \
284 for (index = 0; index < ARRAY_SIZE(r->element); index++)
285#else
286#define VECTOR_FOR_INORDER_I(index, element) \
287 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
288#endif
289
290/* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise,
291 * execute the following block. */
292#define DO_HANDLE_NAN(result, x) \
293 if (float32_is_any_nan(x)) { \
294 CPU_FloatU __f; \
295 __f.f = x; \
296 __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \
297 result = __f.f; \
298 } else
299
300#define HANDLE_NAN1(result, x) \
301 DO_HANDLE_NAN(result, x)
302#define HANDLE_NAN2(result, x, y) \
303 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
304#define HANDLE_NAN3(result, x, y, z) \
305 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
306
307/* Saturating arithmetic helpers. */
308#define SATCVT(from, to, from_type, to_type, min, max) \
309 static inline to_type cvt##from##to(from_type x, int *sat) \
310 { \
311 to_type r; \
312 \
313 if (x < (from_type)min) { \
314 r = min; \
315 *sat = 1; \
316 } else if (x > (from_type)max) { \
317 r = max; \
318 *sat = 1; \
319 } else { \
320 r = x; \
321 } \
322 return r; \
323 }
324#define SATCVTU(from, to, from_type, to_type, min, max) \
325 static inline to_type cvt##from##to(from_type x, int *sat) \
326 { \
327 to_type r; \
328 \
329 if (x > (from_type)max) { \
330 r = max; \
331 *sat = 1; \
332 } else { \
333 r = x; \
334 } \
335 return r; \
336 }
337SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
338SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
339SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
340
341SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
342SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
343SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
344SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
345SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
346SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
347#undef SATCVT
348#undef SATCVTU
349
350void helper_lvsl(ppc_avr_t *r, target_ulong sh)
351{
352 int i, j = (sh & 0xf);
353
354 VECTOR_FOR_INORDER_I(i, u8) {
355 r->u8[i] = j++;
356 }
357}
358
359void helper_lvsr(ppc_avr_t *r, target_ulong sh)
360{
361 int i, j = 0x10 - (sh & 0xf);
362
363 VECTOR_FOR_INORDER_I(i, u8) {
364 r->u8[i] = j++;
365 }
366}
367
d15f74fb 368void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
369{
370#if defined(HOST_WORDS_BIGENDIAN)
371 env->vscr = r->u32[3];
372#else
373 env->vscr = r->u32[0];
374#endif
375 set_flush_to_zero(vscr_nj, &env->vec_status);
376}
377
378void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
379{
380 int i;
381
382 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
383 r->u32[i] = ~a->u32[i] < b->u32[i];
384 }
385}
386
387#define VARITH_DO(name, op, element) \
388 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
389 { \
390 int i; \
391 \
392 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
393 r->element[i] = a->element[i] op b->element[i]; \
394 } \
395 }
396#define VARITH(suffix, element) \
397 VARITH_DO(add##suffix, +, element) \
398 VARITH_DO(sub##suffix, -, element)
399VARITH(ubm, u8)
400VARITH(uhm, u16)
401VARITH(uwm, u32)
402#undef VARITH_DO
403#undef VARITH
404
405#define VARITHFP(suffix, func) \
d15f74fb
BS
406 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
407 ppc_avr_t *b) \
64654ded
BS
408 { \
409 int i; \
410 \
411 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 412 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
413 } \
414 }
415VARITHFP(addfp, float32_add)
416VARITHFP(subfp, float32_sub)
db1babb8
AJ
417VARITHFP(minfp, float32_min)
418VARITHFP(maxfp, float32_max)
64654ded
BS
419#undef VARITHFP
420
421#define VARITHSAT_CASE(type, op, cvt, element) \
422 { \
423 type result = (type)a->element[i] op (type)b->element[i]; \
424 r->element[i] = cvt(result, &sat); \
425 }
426
427#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
428 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
429 ppc_avr_t *b) \
64654ded
BS
430 { \
431 int sat = 0; \
432 int i; \
433 \
434 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
435 switch (sizeof(r->element[0])) { \
436 case 1: \
437 VARITHSAT_CASE(optype, op, cvt, element); \
438 break; \
439 case 2: \
440 VARITHSAT_CASE(optype, op, cvt, element); \
441 break; \
442 case 4: \
443 VARITHSAT_CASE(optype, op, cvt, element); \
444 break; \
445 } \
446 } \
447 if (sat) { \
448 env->vscr |= (1 << VSCR_SAT); \
449 } \
450 }
451#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
452 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
453 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
454#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
455 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
456 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
457VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
458VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
459VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
460VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
461VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
462VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
463#undef VARITHSAT_CASE
464#undef VARITHSAT_DO
465#undef VARITHSAT_SIGNED
466#undef VARITHSAT_UNSIGNED
467
468#define VAVG_DO(name, element, etype) \
469 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
470 { \
471 int i; \
472 \
473 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
474 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
475 r->element[i] = x >> 1; \
476 } \
477 }
478
479#define VAVG(type, signed_element, signed_type, unsigned_element, \
480 unsigned_type) \
481 VAVG_DO(avgs##type, signed_element, signed_type) \
482 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
483VAVG(b, s8, int16_t, u8, uint16_t)
484VAVG(h, s16, int32_t, u16, uint32_t)
485VAVG(w, s32, int64_t, u32, uint64_t)
486#undef VAVG_DO
487#undef VAVG
488
489#define VCF(suffix, cvt, element) \
d15f74fb
BS
490 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
491 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
492 { \
493 int i; \
494 \
495 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
496 float32 t = cvt(b->element[i], &env->vec_status); \
497 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
498 } \
499 }
500VCF(ux, uint32_to_float32, u32)
501VCF(sx, int32_to_float32, s32)
502#undef VCF
503
504#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
505 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
506 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
507 { \
508 uint32_t ones = (uint32_t)-1; \
509 uint32_t all = ones; \
510 uint32_t none = 0; \
511 int i; \
512 \
513 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
514 uint32_t result = (a->element[i] compare b->element[i] ? \
515 ones : 0x0); \
516 switch (sizeof(a->element[0])) { \
517 case 4: \
518 r->u32[i] = result; \
519 break; \
520 case 2: \
521 r->u16[i] = result; \
522 break; \
523 case 1: \
524 r->u8[i] = result; \
525 break; \
526 } \
527 all &= result; \
528 none |= result; \
529 } \
530 if (record) { \
531 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
532 } \
533 }
534#define VCMP(suffix, compare, element) \
535 VCMP_DO(suffix, compare, element, 0) \
536 VCMP_DO(suffix##_dot, compare, element, 1)
537VCMP(equb, ==, u8)
538VCMP(equh, ==, u16)
539VCMP(equw, ==, u32)
540VCMP(gtub, >, u8)
541VCMP(gtuh, >, u16)
542VCMP(gtuw, >, u32)
543VCMP(gtsb, >, s8)
544VCMP(gtsh, >, s16)
545VCMP(gtsw, >, s32)
546#undef VCMP_DO
547#undef VCMP
548
549#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
550 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
551 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
552 { \
553 uint32_t ones = (uint32_t)-1; \
554 uint32_t all = ones; \
555 uint32_t none = 0; \
556 int i; \
557 \
558 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
559 uint32_t result; \
560 int rel = float32_compare_quiet(a->f[i], b->f[i], \
561 &env->vec_status); \
562 if (rel == float_relation_unordered) { \
563 result = 0; \
564 } else if (rel compare order) { \
565 result = ones; \
566 } else { \
567 result = 0; \
568 } \
569 r->u32[i] = result; \
570 all &= result; \
571 none |= result; \
572 } \
573 if (record) { \
574 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
575 } \
576 }
577#define VCMPFP(suffix, compare, order) \
578 VCMPFP_DO(suffix, compare, order, 0) \
579 VCMPFP_DO(suffix##_dot, compare, order, 1)
580VCMPFP(eqfp, ==, float_relation_equal)
581VCMPFP(gefp, !=, float_relation_less)
582VCMPFP(gtfp, ==, float_relation_greater)
583#undef VCMPFP_DO
584#undef VCMPFP
585
d15f74fb
BS
586static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
587 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
588{
589 int i;
590 int all_in = 0;
591
592 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
593 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
594 if (le_rel == float_relation_unordered) {
595 r->u32[i] = 0xc0000000;
596 /* ALL_IN does not need to be updated here. */
597 } else {
598 float32 bneg = float32_chs(b->f[i]);
599 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
600 int le = le_rel != float_relation_greater;
601 int ge = ge_rel != float_relation_less;
602
603 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
604 all_in |= (!le | !ge);
605 }
606 }
607 if (record) {
608 env->crf[6] = (all_in == 0) << 1;
609 }
610}
611
d15f74fb 612void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 613{
d15f74fb 614 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
615}
616
d15f74fb
BS
617void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
618 ppc_avr_t *b)
64654ded 619{
d15f74fb 620 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
621}
622
623#define VCT(suffix, satcvt, element) \
d15f74fb
BS
624 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
625 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
626 { \
627 int i; \
628 int sat = 0; \
629 float_status s = env->vec_status; \
630 \
631 set_float_rounding_mode(float_round_to_zero, &s); \
632 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
633 if (float32_is_any_nan(b->f[i])) { \
634 r->element[i] = 0; \
635 } else { \
636 float64 t = float32_to_float64(b->f[i], &s); \
637 int64_t j; \
638 \
639 t = float64_scalbn(t, uim, &s); \
640 j = float64_to_int64(t, &s); \
641 r->element[i] = satcvt(j, &sat); \
642 } \
643 } \
644 if (sat) { \
645 env->vscr |= (1 << VSCR_SAT); \
646 } \
647 }
648VCT(uxs, cvtsduw, u32)
649VCT(sxs, cvtsdsw, s32)
650#undef VCT
651
d15f74fb
BS
652void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
653 ppc_avr_t *c)
64654ded
BS
654{
655 int i;
656
657 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
658 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
659 /* Need to do the computation in higher precision and round
660 * once at the end. */
661 float64 af, bf, cf, t;
662
663 af = float32_to_float64(a->f[i], &env->vec_status);
664 bf = float32_to_float64(b->f[i], &env->vec_status);
665 cf = float32_to_float64(c->f[i], &env->vec_status);
666 t = float64_mul(af, cf, &env->vec_status);
667 t = float64_add(t, bf, &env->vec_status);
668 r->f[i] = float64_to_float32(t, &env->vec_status);
669 }
670 }
671}
672
d15f74fb
BS
673void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
674 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
675{
676 int sat = 0;
677 int i;
678
679 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
680 int32_t prod = a->s16[i] * b->s16[i];
681 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
682
683 r->s16[i] = cvtswsh(t, &sat);
684 }
685
686 if (sat) {
687 env->vscr |= (1 << VSCR_SAT);
688 }
689}
690
d15f74fb
BS
691void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
692 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
693{
694 int sat = 0;
695 int i;
696
697 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
698 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
699 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
700 r->s16[i] = cvtswsh(t, &sat);
701 }
702
703 if (sat) {
704 env->vscr |= (1 << VSCR_SAT);
705 }
706}
707
708#define VMINMAX_DO(name, compare, element) \
709 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
710 { \
711 int i; \
712 \
713 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
714 if (a->element[i] compare b->element[i]) { \
715 r->element[i] = b->element[i]; \
716 } else { \
717 r->element[i] = a->element[i]; \
718 } \
719 } \
720 }
721#define VMINMAX(suffix, element) \
722 VMINMAX_DO(min##suffix, >, element) \
723 VMINMAX_DO(max##suffix, <, element)
724VMINMAX(sb, s8)
725VMINMAX(sh, s16)
726VMINMAX(sw, s32)
727VMINMAX(ub, u8)
728VMINMAX(uh, u16)
729VMINMAX(uw, u32)
730#undef VMINMAX_DO
731#undef VMINMAX
732
64654ded
BS
733void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
734{
735 int i;
736
737 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
738 int32_t prod = a->s16[i] * b->s16[i];
739 r->s16[i] = (int16_t) (prod + c->s16[i]);
740 }
741}
742
743#define VMRG_DO(name, element, highp) \
744 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
745 { \
746 ppc_avr_t result; \
747 int i; \
748 size_t n_elems = ARRAY_SIZE(r->element); \
749 \
750 for (i = 0; i < n_elems / 2; i++) { \
751 if (highp) { \
752 result.element[i*2+HI_IDX] = a->element[i]; \
753 result.element[i*2+LO_IDX] = b->element[i]; \
754 } else { \
755 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
756 b->element[n_elems - i - 1]; \
757 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
758 a->element[n_elems - i - 1]; \
759 } \
760 } \
761 *r = result; \
762 }
763#if defined(HOST_WORDS_BIGENDIAN)
764#define MRGHI 0
765#define MRGLO 1
766#else
767#define MRGHI 1
768#define MRGLO 0
769#endif
770#define VMRG(suffix, element) \
771 VMRG_DO(mrgl##suffix, element, MRGHI) \
772 VMRG_DO(mrgh##suffix, element, MRGLO)
773VMRG(b, u8)
774VMRG(h, u16)
775VMRG(w, u32)
776#undef VMRG_DO
777#undef VMRG
778#undef MRGHI
779#undef MRGLO
780
d15f74fb
BS
781void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
782 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
783{
784 int32_t prod[16];
785 int i;
786
787 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
788 prod[i] = (int32_t)a->s8[i] * b->u8[i];
789 }
790
791 VECTOR_FOR_INORDER_I(i, s32) {
792 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
793 prod[4 * i + 2] + prod[4 * i + 3];
794 }
795}
796
d15f74fb
BS
797void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
798 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
799{
800 int32_t prod[8];
801 int i;
802
803 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
804 prod[i] = a->s16[i] * b->s16[i];
805 }
806
807 VECTOR_FOR_INORDER_I(i, s32) {
808 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
809 }
810}
811
d15f74fb
BS
812void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
813 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
814{
815 int32_t prod[8];
816 int i;
817 int sat = 0;
818
819 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
820 prod[i] = (int32_t)a->s16[i] * b->s16[i];
821 }
822
823 VECTOR_FOR_INORDER_I(i, s32) {
824 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
825
826 r->u32[i] = cvtsdsw(t, &sat);
827 }
828
829 if (sat) {
830 env->vscr |= (1 << VSCR_SAT);
831 }
832}
833
d15f74fb
BS
834void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
835 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
836{
837 uint16_t prod[16];
838 int i;
839
840 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
841 prod[i] = a->u8[i] * b->u8[i];
842 }
843
844 VECTOR_FOR_INORDER_I(i, u32) {
845 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
846 prod[4 * i + 2] + prod[4 * i + 3];
847 }
848}
849
d15f74fb
BS
850void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
851 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
852{
853 uint32_t prod[8];
854 int i;
855
856 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
857 prod[i] = a->u16[i] * b->u16[i];
858 }
859
860 VECTOR_FOR_INORDER_I(i, u32) {
861 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
862 }
863}
864
d15f74fb
BS
865void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
866 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
867{
868 uint32_t prod[8];
869 int i;
870 int sat = 0;
871
872 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
873 prod[i] = a->u16[i] * b->u16[i];
874 }
875
876 VECTOR_FOR_INORDER_I(i, s32) {
877 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
878
879 r->u32[i] = cvtuduw(t, &sat);
880 }
881
882 if (sat) {
883 env->vscr |= (1 << VSCR_SAT);
884 }
885}
886
887#define VMUL_DO(name, mul_element, prod_element, evenp) \
888 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
889 { \
890 int i; \
891 \
892 VECTOR_FOR_INORDER_I(i, prod_element) { \
893 if (evenp) { \
894 r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] * \
895 b->mul_element[i * 2 + HI_IDX]; \
896 } else { \
897 r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] * \
898 b->mul_element[i * 2 + LO_IDX]; \
899 } \
900 } \
901 }
902#define VMUL(suffix, mul_element, prod_element) \
903 VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
904 VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
905VMUL(sb, s8, s16)
906VMUL(sh, s16, s32)
907VMUL(ub, u8, u16)
908VMUL(uh, u16, u32)
909#undef VMUL_DO
910#undef VMUL
911
d15f74fb
BS
912void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
913 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
914{
915 int i;
916
917 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
918 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
919 /* Need to do the computation is higher precision and round
920 * once at the end. */
921 float64 af, bf, cf, t;
922
923 af = float32_to_float64(a->f[i], &env->vec_status);
924 bf = float32_to_float64(b->f[i], &env->vec_status);
925 cf = float32_to_float64(c->f[i], &env->vec_status);
926 t = float64_mul(af, cf, &env->vec_status);
927 t = float64_sub(t, bf, &env->vec_status);
928 t = float64_chs(t);
929 r->f[i] = float64_to_float32(t, &env->vec_status);
930 }
931 }
932}
933
d15f74fb
BS
934void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
935 ppc_avr_t *c)
64654ded
BS
936{
937 ppc_avr_t result;
938 int i;
939
940 VECTOR_FOR_INORDER_I(i, u8) {
941 int s = c->u8[i] & 0x1f;
942#if defined(HOST_WORDS_BIGENDIAN)
943 int index = s & 0xf;
944#else
945 int index = 15 - (s & 0xf);
946#endif
947
948 if (s & 0x10) {
949 result.u8[i] = b->u8[index];
950 } else {
951 result.u8[i] = a->u8[index];
952 }
953 }
954 *r = result;
955}
956
957#if defined(HOST_WORDS_BIGENDIAN)
958#define PKBIG 1
959#else
960#define PKBIG 0
961#endif
962void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
963{
964 int i, j;
965 ppc_avr_t result;
966#if defined(HOST_WORDS_BIGENDIAN)
967 const ppc_avr_t *x[2] = { a, b };
968#else
969 const ppc_avr_t *x[2] = { b, a };
970#endif
971
972 VECTOR_FOR_INORDER_I(i, u64) {
973 VECTOR_FOR_INORDER_I(j, u32) {
974 uint32_t e = x[i]->u32[j];
975
976 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
977 ((e >> 6) & 0x3e0) |
978 ((e >> 3) & 0x1f));
979 }
980 }
981 *r = result;
982}
983
984#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
985 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
986 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
987 { \
988 int i; \
989 int sat = 0; \
990 ppc_avr_t result; \
991 ppc_avr_t *a0 = PKBIG ? a : b; \
992 ppc_avr_t *a1 = PKBIG ? b : a; \
993 \
994 VECTOR_FOR_INORDER_I(i, from) { \
995 result.to[i] = cvt(a0->from[i], &sat); \
996 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
997 } \
998 *r = result; \
999 if (dosat && sat) { \
1000 env->vscr |= (1 << VSCR_SAT); \
1001 } \
1002 }
1003#define I(x, y) (x)
1004VPK(shss, s16, s8, cvtshsb, 1)
1005VPK(shus, s16, u8, cvtshub, 1)
1006VPK(swss, s32, s16, cvtswsh, 1)
1007VPK(swus, s32, u16, cvtswuh, 1)
1008VPK(uhus, u16, u8, cvtuhub, 1)
1009VPK(uwus, u32, u16, cvtuwuh, 1)
1010VPK(uhum, u16, u8, I, 0)
1011VPK(uwum, u32, u16, I, 0)
1012#undef I
1013#undef VPK
1014#undef PKBIG
1015
d15f74fb 1016void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1017{
1018 int i;
1019
1020 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1021 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1022 }
1023}
1024
1025#define VRFI(suffix, rounding) \
d15f74fb
BS
1026 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1027 ppc_avr_t *b) \
64654ded
BS
1028 { \
1029 int i; \
1030 float_status s = env->vec_status; \
1031 \
1032 set_float_rounding_mode(rounding, &s); \
1033 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1034 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1035 } \
1036 }
1037VRFI(n, float_round_nearest_even)
1038VRFI(m, float_round_down)
1039VRFI(p, float_round_up)
1040VRFI(z, float_round_to_zero)
1041#undef VRFI
1042
1043#define VROTATE(suffix, element) \
1044 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1045 { \
1046 int i; \
1047 \
1048 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1049 unsigned int mask = ((1 << \
1050 (3 + (sizeof(a->element[0]) >> 1))) \
1051 - 1); \
1052 unsigned int shift = b->element[i] & mask; \
1053 r->element[i] = (a->element[i] << shift) | \
1054 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1055 } \
1056 }
1057VROTATE(b, u8)
1058VROTATE(h, u16)
1059VROTATE(w, u32)
1060#undef VROTATE
1061
d15f74fb 1062void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1063{
1064 int i;
1065
1066 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1067 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1068
ef9bd150 1069 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1070 }
1071}
1072
d15f74fb
BS
1073void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1074 ppc_avr_t *c)
64654ded
BS
1075{
1076 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1077 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1078}
1079
d15f74fb 1080void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1081{
1082 int i;
1083
1084 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1085 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1086 }
1087}
1088
d15f74fb 1089void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1090{
1091 int i;
1092
1093 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1094 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1095 }
1096}
1097
1098#if defined(HOST_WORDS_BIGENDIAN)
1099#define LEFT 0
1100#define RIGHT 1
1101#else
1102#define LEFT 1
1103#define RIGHT 0
1104#endif
1105/* The specification says that the results are undefined if all of the
1106 * shift counts are not identical. We check to make sure that they are
1107 * to conform to what real hardware appears to do. */
1108#define VSHIFT(suffix, leftp) \
1109 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1110 { \
1111 int shift = b->u8[LO_IDX*15] & 0x7; \
1112 int doit = 1; \
1113 int i; \
1114 \
1115 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1116 doit = doit && ((b->u8[i] & 0x7) == shift); \
1117 } \
1118 if (doit) { \
1119 if (shift == 0) { \
1120 *r = *a; \
1121 } else if (leftp) { \
1122 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1123 \
1124 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1125 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1126 } else { \
1127 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1128 \
1129 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1130 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1131 } \
1132 } \
1133 }
1134VSHIFT(l, LEFT)
1135VSHIFT(r, RIGHT)
1136#undef VSHIFT
1137#undef LEFT
1138#undef RIGHT
1139
1140#define VSL(suffix, element) \
1141 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1142 { \
1143 int i; \
1144 \
1145 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1146 unsigned int mask = ((1 << \
1147 (3 + (sizeof(a->element[0]) >> 1))) \
1148 - 1); \
1149 unsigned int shift = b->element[i] & mask; \
1150 \
1151 r->element[i] = a->element[i] << shift; \
1152 } \
1153 }
1154VSL(b, u8)
1155VSL(h, u16)
1156VSL(w, u32)
1157#undef VSL
1158
1159void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1160{
1161 int sh = shift & 0xf;
1162 int i;
1163 ppc_avr_t result;
1164
1165#if defined(HOST_WORDS_BIGENDIAN)
1166 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1167 int index = sh + i;
1168 if (index > 0xf) {
1169 result.u8[i] = b->u8[index - 0x10];
1170 } else {
1171 result.u8[i] = a->u8[index];
1172 }
1173 }
1174#else
1175 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1176 int index = (16 - sh) + i;
1177 if (index > 0xf) {
1178 result.u8[i] = a->u8[index - 0x10];
1179 } else {
1180 result.u8[i] = b->u8[index];
1181 }
1182 }
1183#endif
1184 *r = result;
1185}
1186
1187void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1188{
1189 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1190
1191#if defined(HOST_WORDS_BIGENDIAN)
1192 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1193 memset(&r->u8[16-sh], 0, sh);
1194#else
1195 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1196 memset(&r->u8[0], 0, sh);
1197#endif
1198}
1199
1200/* Experimental testing shows that hardware masks the immediate. */
1201#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1202#if defined(HOST_WORDS_BIGENDIAN)
1203#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1204#else
1205#define SPLAT_ELEMENT(element) \
1206 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1207#endif
1208#define VSPLT(suffix, element) \
1209 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1210 { \
1211 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1212 int i; \
1213 \
1214 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1215 r->element[i] = s; \
1216 } \
1217 }
1218VSPLT(b, u8)
1219VSPLT(h, u16)
1220VSPLT(w, u32)
1221#undef VSPLT
1222#undef SPLAT_ELEMENT
1223#undef _SPLAT_MASKED
1224
1225#define VSPLTI(suffix, element, splat_type) \
1226 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1227 { \
1228 splat_type x = (int8_t)(splat << 3) >> 3; \
1229 int i; \
1230 \
1231 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1232 r->element[i] = x; \
1233 } \
1234 }
1235VSPLTI(b, s8, int8_t)
1236VSPLTI(h, s16, int16_t)
1237VSPLTI(w, s32, int32_t)
1238#undef VSPLTI
1239
1240#define VSR(suffix, element) \
1241 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1242 { \
1243 int i; \
1244 \
1245 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1246 unsigned int mask = ((1 << \
1247 (3 + (sizeof(a->element[0]) >> 1))) \
1248 - 1); \
1249 unsigned int shift = b->element[i] & mask; \
1250 \
1251 r->element[i] = a->element[i] >> shift; \
1252 } \
1253 }
1254VSR(ab, s8)
1255VSR(ah, s16)
1256VSR(aw, s32)
1257VSR(b, u8)
1258VSR(h, u16)
1259VSR(w, u32)
1260#undef VSR
1261
1262void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1263{
1264 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1265
1266#if defined(HOST_WORDS_BIGENDIAN)
1267 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1268 memset(&r->u8[0], 0, sh);
1269#else
1270 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1271 memset(&r->u8[16 - sh], 0, sh);
1272#endif
1273}
1274
1275void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1276{
1277 int i;
1278
1279 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1280 r->u32[i] = a->u32[i] >= b->u32[i];
1281 }
1282}
1283
d15f74fb 1284void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1285{
1286 int64_t t;
1287 int i, upper;
1288 ppc_avr_t result;
1289 int sat = 0;
1290
1291#if defined(HOST_WORDS_BIGENDIAN)
1292 upper = ARRAY_SIZE(r->s32)-1;
1293#else
1294 upper = 0;
1295#endif
1296 t = (int64_t)b->s32[upper];
1297 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1298 t += a->s32[i];
1299 result.s32[i] = 0;
1300 }
1301 result.s32[upper] = cvtsdsw(t, &sat);
1302 *r = result;
1303
1304 if (sat) {
1305 env->vscr |= (1 << VSCR_SAT);
1306 }
1307}
1308
d15f74fb 1309void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1310{
1311 int i, j, upper;
1312 ppc_avr_t result;
1313 int sat = 0;
1314
1315#if defined(HOST_WORDS_BIGENDIAN)
1316 upper = 1;
1317#else
1318 upper = 0;
1319#endif
1320 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1321 int64_t t = (int64_t)b->s32[upper + i * 2];
1322
1323 result.u64[i] = 0;
1324 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1325 t += a->s32[2 * i + j];
1326 }
1327 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1328 }
1329
1330 *r = result;
1331 if (sat) {
1332 env->vscr |= (1 << VSCR_SAT);
1333 }
1334}
1335
d15f74fb 1336void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1337{
1338 int i, j;
1339 int sat = 0;
1340
1341 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1342 int64_t t = (int64_t)b->s32[i];
1343
1344 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1345 t += a->s8[4 * i + j];
1346 }
1347 r->s32[i] = cvtsdsw(t, &sat);
1348 }
1349
1350 if (sat) {
1351 env->vscr |= (1 << VSCR_SAT);
1352 }
1353}
1354
d15f74fb 1355void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1356{
1357 int sat = 0;
1358 int i;
1359
1360 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1361 int64_t t = (int64_t)b->s32[i];
1362
1363 t += a->s16[2 * i] + a->s16[2 * i + 1];
1364 r->s32[i] = cvtsdsw(t, &sat);
1365 }
1366
1367 if (sat) {
1368 env->vscr |= (1 << VSCR_SAT);
1369 }
1370}
1371
d15f74fb 1372void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1373{
1374 int i, j;
1375 int sat = 0;
1376
1377 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1378 uint64_t t = (uint64_t)b->u32[i];
1379
1380 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1381 t += a->u8[4 * i + j];
1382 }
1383 r->u32[i] = cvtuduw(t, &sat);
1384 }
1385
1386 if (sat) {
1387 env->vscr |= (1 << VSCR_SAT);
1388 }
1389}
1390
1391#if defined(HOST_WORDS_BIGENDIAN)
1392#define UPKHI 1
1393#define UPKLO 0
1394#else
1395#define UPKHI 0
1396#define UPKLO 1
1397#endif
1398#define VUPKPX(suffix, hi) \
1399 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1400 { \
1401 int i; \
1402 ppc_avr_t result; \
1403 \
1404 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1405 uint16_t e = b->u16[hi ? i : i+4]; \
1406 uint8_t a = (e >> 15) ? 0xff : 0; \
1407 uint8_t r = (e >> 10) & 0x1f; \
1408 uint8_t g = (e >> 5) & 0x1f; \
1409 uint8_t b = e & 0x1f; \
1410 \
1411 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1412 } \
1413 *r = result; \
1414 }
1415VUPKPX(lpx, UPKLO)
1416VUPKPX(hpx, UPKHI)
1417#undef VUPKPX
1418
1419#define VUPK(suffix, unpacked, packee, hi) \
1420 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1421 { \
1422 int i; \
1423 ppc_avr_t result; \
1424 \
1425 if (hi) { \
1426 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1427 result.unpacked[i] = b->packee[i]; \
1428 } \
1429 } else { \
1430 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1431 i++) { \
1432 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1433 } \
1434 } \
1435 *r = result; \
1436 }
1437VUPK(hsb, s16, s8, UPKHI)
1438VUPK(hsh, s32, s16, UPKHI)
1439VUPK(lsb, s16, s8, UPKLO)
1440VUPK(lsh, s32, s16, UPKLO)
1441#undef VUPK
1442#undef UPKHI
1443#undef UPKLO
1444
1445#undef DO_HANDLE_NAN
1446#undef HANDLE_NAN1
1447#undef HANDLE_NAN2
1448#undef HANDLE_NAN3
1449#undef VECTOR_FOR_INORDER_I
1450#undef HI_IDX
1451#undef LO_IDX
1452
1453/*****************************************************************************/
1454/* SPE extension helpers */
1455/* Use a table to make this quicker */
ea6c0dac 1456static const uint8_t hbrev[16] = {
64654ded
BS
1457 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1458 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1459};
1460
1461static inline uint8_t byte_reverse(uint8_t val)
1462{
1463 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1464}
1465
1466static inline uint32_t word_reverse(uint32_t val)
1467{
1468 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1469 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1470}
1471
1472#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1473target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1474{
1475 uint32_t a, b, d, mask;
1476
1477 mask = UINT32_MAX >> (32 - MASKBITS);
1478 a = arg1 & mask;
1479 b = arg2 & mask;
1480 d = word_reverse(1 + word_reverse(a | ~b));
1481 return (arg1 & ~mask) | (d & b);
1482}
1483
1484uint32_t helper_cntlsw32(uint32_t val)
1485{
1486 if (val & 0x80000000) {
1487 return clz32(~val);
1488 } else {
1489 return clz32(val);
1490 }
1491}
1492
1493uint32_t helper_cntlzw32(uint32_t val)
1494{
1495 return clz32(val);
1496}
1497
1498/* 440 specific */
d15f74fb
BS
1499target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1500 target_ulong low, uint32_t update_Rc)
64654ded
BS
1501{
1502 target_ulong mask;
1503 int i;
1504
1505 i = 1;
1506 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1507 if ((high & mask) == 0) {
1508 if (update_Rc) {
1509 env->crf[0] = 0x4;
1510 }
1511 goto done;
1512 }
1513 i++;
1514 }
1515 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1516 if ((low & mask) == 0) {
1517 if (update_Rc) {
1518 env->crf[0] = 0x8;
1519 }
1520 goto done;
1521 }
1522 i++;
1523 }
1524 if (update_Rc) {
1525 env->crf[0] = 0x2;
1526 }
1527 done:
1528 env->xer = (env->xer & ~0x7F) | i;
1529 if (update_Rc) {
1530 env->crf[0] |= xer_so;
1531 }
1532 return i;
1533}