]> git.proxmox.com Git - mirror_qemu.git/blame - target/ppc/int_helper.c
target-alpha: Use ctpop helper
[mirror_qemu.git] / target / ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
3e00884f 21#include "internal.h"
63c91552 22#include "exec/exec-all.h"
1de7afc9 23#include "qemu/host-utils.h"
2ef6175a 24#include "exec/helper-proto.h"
6f2945cd 25#include "crypto/aes.h"
64654ded
BS
26
27#include "helper_regs.h"
28/*****************************************************************************/
29/* Fixed point operations helpers */
64654ded 30
6a4fda33
TM
31target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
33{
34 uint64_t rt = 0;
35 int overflow = 0;
36
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
39
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
45 }
46
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
49 }
50
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
56 }
57 }
58
59 return (target_ulong)rt;
60}
61
a98eb9e9
TM
62target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
64{
65 int64_t rt = 0;
66 int overflow = 0;
67
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
70
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
77 }
78
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
81 }
82
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
88 }
89 }
90
91 return (target_ulong)rt;
92}
93
98d1eb27
TM
94#if defined(TARGET_PPC64)
95
96uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97{
98 uint64_t rt = 0;
99 int overflow = 0;
100
101 overflow = divu128(&rt, &ra, rb);
102
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
105 }
106
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
112 }
113 }
114
115 return rt;
116}
117
e44259b6
TM
118uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119{
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
124
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
127 }
128
129 if (oe) {
130
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
135 }
136 }
137
138 return rt;
139}
140
98d1eb27
TM
141#endif
142
143
64654ded 144#if defined(TARGET_PPC64)
082ce330
ND
145/* if x = 0xab, returns 0xababababababababa */
146#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
147
148/* substract 1 from each byte, and with inverse, check if MSB is set at each
149 * byte.
150 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
151 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
152 */
153#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
154
155/* When you XOR the pattern and there is a match, that byte will be zero */
156#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
157
158uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
159{
160 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
161}
162
163#undef pattern
164#undef haszero
165#undef hasvalue
166
fec5c62a
RB
167/* Return invalid random number.
168 *
169 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
170 * random number
171 */
172target_ulong helper_darn32(void)
173{
174 return -1;
175}
176
177target_ulong helper_darn64(void)
178{
179 return -1;
180}
181
64654ded
BS
182#endif
183
86ba37ed
TM
184#if defined(TARGET_PPC64)
185
186uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
187{
188 int i;
189 uint64_t ra = 0;
190
191 for (i = 0; i < 8; i++) {
192 int index = (rs >> (i*8)) & 0xFF;
193 if (index < 64) {
194 if (rb & (1ull << (63-index))) {
195 ra |= 1 << i;
196 }
197 }
198 }
199 return ra;
200}
201
202#endif
203
fcfda20f
AJ
204target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
205{
206 target_ulong mask = 0xff;
207 target_ulong ra = 0;
208 int i;
209
210 for (i = 0; i < sizeof(target_ulong); i++) {
211 if ((rs & mask) == (rb & mask)) {
212 ra |= mask;
213 }
214 mask <<= 8;
215 }
216 return ra;
217}
218
64654ded 219/* shift right arithmetic helper */
d15f74fb
BS
220target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
221 target_ulong shift)
64654ded
BS
222{
223 int32_t ret;
224
225 if (likely(!(shift & 0x20))) {
226 if (likely((uint32_t)shift != 0)) {
227 shift &= 0x1f;
228 ret = (int32_t)value >> shift;
229 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
da91a00f 230 env->ca = 0;
64654ded 231 } else {
da91a00f 232 env->ca = 1;
64654ded
BS
233 }
234 } else {
235 ret = (int32_t)value;
da91a00f 236 env->ca = 0;
64654ded
BS
237 }
238 } else {
239 ret = (int32_t)value >> 31;
da91a00f 240 env->ca = (ret != 0);
64654ded
BS
241 }
242 return (target_long)ret;
243}
244
245#if defined(TARGET_PPC64)
d15f74fb
BS
246target_ulong helper_srad(CPUPPCState *env, target_ulong value,
247 target_ulong shift)
64654ded
BS
248{
249 int64_t ret;
250
251 if (likely(!(shift & 0x40))) {
252 if (likely((uint64_t)shift != 0)) {
253 shift &= 0x3f;
254 ret = (int64_t)value >> shift;
4bc02e23 255 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
da91a00f 256 env->ca = 0;
64654ded 257 } else {
da91a00f 258 env->ca = 1;
64654ded
BS
259 }
260 } else {
261 ret = (int64_t)value;
da91a00f 262 env->ca = 0;
64654ded
BS
263 }
264 } else {
265 ret = (int64_t)value >> 63;
da91a00f 266 env->ca = (ret != 0);
64654ded
BS
267 }
268 return ret;
269}
270#endif
271
272#if defined(TARGET_PPC64)
273target_ulong helper_popcntb(target_ulong val)
274{
275 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
276 0x5555555555555555ULL);
277 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
278 0x3333333333333333ULL);
279 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
280 0x0f0f0f0f0f0f0f0fULL);
281 return val;
282}
283
284target_ulong helper_popcntw(target_ulong val)
285{
286 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
287 0x5555555555555555ULL);
288 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
289 0x3333333333333333ULL);
290 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
291 0x0f0f0f0f0f0f0f0fULL);
292 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
293 0x00ff00ff00ff00ffULL);
294 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
295 0x0000ffff0000ffffULL);
296 return val;
297}
298
299target_ulong helper_popcntd(target_ulong val)
300{
301 return ctpop64(val);
302}
303#else
304target_ulong helper_popcntb(target_ulong val)
305{
306 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
307 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
308 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
309 return val;
310}
311
312target_ulong helper_popcntw(target_ulong val)
313{
314 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
315 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
316 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
317 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
318 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
319 return val;
320}
321#endif
322
323/*****************************************************************************/
324/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 325target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
326{
327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328
329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
330 (int32_t)arg2 == 0) {
331 env->spr[SPR_MQ] = 0;
332 return INT32_MIN;
333 } else {
334 env->spr[SPR_MQ] = tmp % arg2;
335 return tmp / (int32_t)arg2;
336 }
337}
338
d15f74fb
BS
339target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
340 target_ulong arg2)
64654ded
BS
341{
342 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
343
344 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
345 (int32_t)arg2 == 0) {
da91a00f 346 env->so = env->ov = 1;
64654ded
BS
347 env->spr[SPR_MQ] = 0;
348 return INT32_MIN;
349 } else {
350 env->spr[SPR_MQ] = tmp % arg2;
351 tmp /= (int32_t)arg2;
352 if ((int32_t)tmp != tmp) {
da91a00f 353 env->so = env->ov = 1;
64654ded 354 } else {
da91a00f 355 env->ov = 0;
64654ded
BS
356 }
357 return tmp;
358 }
359}
360
d15f74fb
BS
361target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
362 target_ulong arg2)
64654ded
BS
363{
364 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
365 (int32_t)arg2 == 0) {
366 env->spr[SPR_MQ] = 0;
367 return INT32_MIN;
368 } else {
369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
370 return (int32_t)arg1 / (int32_t)arg2;
371 }
372}
373
d15f74fb
BS
374target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
375 target_ulong arg2)
64654ded
BS
376{
377 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
378 (int32_t)arg2 == 0) {
da91a00f 379 env->so = env->ov = 1;
64654ded
BS
380 env->spr[SPR_MQ] = 0;
381 return INT32_MIN;
382 } else {
da91a00f 383 env->ov = 0;
64654ded
BS
384 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
385 return (int32_t)arg1 / (int32_t)arg2;
386 }
387}
388
389/*****************************************************************************/
390/* 602 specific instructions */
391/* mfrom is the most crazy instruction ever seen, imho ! */
392/* Real implementation uses a ROM table. Do the same */
393/* Extremely decomposed:
394 * -arg / 256
395 * return 256 * log10(10 + 1.0) + 0.5
396 */
397#if !defined(CONFIG_USER_ONLY)
398target_ulong helper_602_mfrom(target_ulong arg)
399{
400 if (likely(arg < 602)) {
401#include "mfrom_table.c"
402 return mfrom_ROM_table[arg];
403 } else {
404 return 0;
405 }
406}
407#endif
408
409/*****************************************************************************/
410/* Altivec extension helpers */
411#if defined(HOST_WORDS_BIGENDIAN)
412#define HI_IDX 0
413#define LO_IDX 1
c1542453
TM
414#define AVRB(i) u8[i]
415#define AVRW(i) u32[i]
64654ded
BS
416#else
417#define HI_IDX 1
418#define LO_IDX 0
c1542453
TM
419#define AVRB(i) u8[15-(i)]
420#define AVRW(i) u32[3-(i)]
64654ded
BS
421#endif
422
423#if defined(HOST_WORDS_BIGENDIAN)
424#define VECTOR_FOR_INORDER_I(index, element) \
425 for (index = 0; index < ARRAY_SIZE(r->element); index++)
426#else
427#define VECTOR_FOR_INORDER_I(index, element) \
428 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
429#endif
430
64654ded
BS
431/* Saturating arithmetic helpers. */
432#define SATCVT(from, to, from_type, to_type, min, max) \
433 static inline to_type cvt##from##to(from_type x, int *sat) \
434 { \
435 to_type r; \
436 \
437 if (x < (from_type)min) { \
438 r = min; \
439 *sat = 1; \
440 } else if (x > (from_type)max) { \
441 r = max; \
442 *sat = 1; \
443 } else { \
444 r = x; \
445 } \
446 return r; \
447 }
448#define SATCVTU(from, to, from_type, to_type, min, max) \
449 static inline to_type cvt##from##to(from_type x, int *sat) \
450 { \
451 to_type r; \
452 \
453 if (x > (from_type)max) { \
454 r = max; \
455 *sat = 1; \
456 } else { \
457 r = x; \
458 } \
459 return r; \
460 }
461SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
462SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
463SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
464
465SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
466SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
467SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
468SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
469SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
470SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
471#undef SATCVT
472#undef SATCVTU
473
474void helper_lvsl(ppc_avr_t *r, target_ulong sh)
475{
476 int i, j = (sh & 0xf);
477
478 VECTOR_FOR_INORDER_I(i, u8) {
479 r->u8[i] = j++;
480 }
481}
482
483void helper_lvsr(ppc_avr_t *r, target_ulong sh)
484{
485 int i, j = 0x10 - (sh & 0xf);
486
487 VECTOR_FOR_INORDER_I(i, u8) {
488 r->u8[i] = j++;
489 }
490}
491
d15f74fb 492void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
493{
494#if defined(HOST_WORDS_BIGENDIAN)
495 env->vscr = r->u32[3];
496#else
497 env->vscr = r->u32[0];
498#endif
499 set_flush_to_zero(vscr_nj, &env->vec_status);
500}
501
502void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
503{
504 int i;
505
506 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
507 r->u32[i] = ~a->u32[i] < b->u32[i];
508 }
509}
510
5c69452c
AK
511/* vprtybw */
512void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
513{
514 int i;
515 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
516 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
517 res ^= res >> 8;
518 r->u32[i] = res & 1;
519 }
520}
521
522/* vprtybd */
523void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
524{
525 int i;
526 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
527 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
528 res ^= res >> 16;
529 res ^= res >> 8;
530 r->u64[i] = res & 1;
531 }
532}
533
534/* vprtybq */
535void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
536{
537 uint64_t res = b->u64[0] ^ b->u64[1];
538 res ^= res >> 32;
539 res ^= res >> 16;
540 res ^= res >> 8;
541 r->u64[LO_IDX] = res & 1;
542 r->u64[HI_IDX] = 0;
543}
544
64654ded
BS
545#define VARITH_DO(name, op, element) \
546 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
547 { \
548 int i; \
549 \
550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
551 r->element[i] = a->element[i] op b->element[i]; \
552 } \
553 }
554#define VARITH(suffix, element) \
555 VARITH_DO(add##suffix, +, element) \
556 VARITH_DO(sub##suffix, -, element)
557VARITH(ubm, u8)
558VARITH(uhm, u16)
559VARITH(uwm, u32)
56eabc75 560VARITH(udm, u64)
953f0f58 561VARITH_DO(muluwm, *, u32)
64654ded
BS
562#undef VARITH_DO
563#undef VARITH
564
565#define VARITHFP(suffix, func) \
d15f74fb
BS
566 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
567 ppc_avr_t *b) \
64654ded
BS
568 { \
569 int i; \
570 \
571 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 572 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
573 } \
574 }
575VARITHFP(addfp, float32_add)
576VARITHFP(subfp, float32_sub)
db1babb8
AJ
577VARITHFP(minfp, float32_min)
578VARITHFP(maxfp, float32_max)
64654ded
BS
579#undef VARITHFP
580
2f93c23f
AJ
581#define VARITHFPFMA(suffix, type) \
582 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
583 ppc_avr_t *b, ppc_avr_t *c) \
584 { \
585 int i; \
586 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
587 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
588 type, &env->vec_status); \
589 } \
590 }
591VARITHFPFMA(maddfp, 0);
592VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
593#undef VARITHFPFMA
594
64654ded
BS
595#define VARITHSAT_CASE(type, op, cvt, element) \
596 { \
597 type result = (type)a->element[i] op (type)b->element[i]; \
598 r->element[i] = cvt(result, &sat); \
599 }
600
601#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
602 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
603 ppc_avr_t *b) \
64654ded
BS
604 { \
605 int sat = 0; \
606 int i; \
607 \
608 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
609 switch (sizeof(r->element[0])) { \
610 case 1: \
611 VARITHSAT_CASE(optype, op, cvt, element); \
612 break; \
613 case 2: \
614 VARITHSAT_CASE(optype, op, cvt, element); \
615 break; \
616 case 4: \
617 VARITHSAT_CASE(optype, op, cvt, element); \
618 break; \
619 } \
620 } \
621 if (sat) { \
622 env->vscr |= (1 << VSCR_SAT); \
623 } \
624 }
625#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
626 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
627 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
628#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
629 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
630 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
631VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
632VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
633VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
634VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
635VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
636VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
637#undef VARITHSAT_CASE
638#undef VARITHSAT_DO
639#undef VARITHSAT_SIGNED
640#undef VARITHSAT_UNSIGNED
641
642#define VAVG_DO(name, element, etype) \
643 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
644 { \
645 int i; \
646 \
647 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
648 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
649 r->element[i] = x >> 1; \
650 } \
651 }
652
653#define VAVG(type, signed_element, signed_type, unsigned_element, \
654 unsigned_type) \
655 VAVG_DO(avgs##type, signed_element, signed_type) \
656 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
657VAVG(b, s8, int16_t, u8, uint16_t)
658VAVG(h, s16, int32_t, u16, uint32_t)
659VAVG(w, s32, int64_t, u32, uint64_t)
660#undef VAVG_DO
661#undef VAVG
662
37707059
SD
663#define VABSDU_DO(name, element) \
664void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
665{ \
666 int i; \
667 \
668 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
669 r->element[i] = (a->element[i] > b->element[i]) ? \
670 (a->element[i] - b->element[i]) : \
671 (b->element[i] - a->element[i]); \
672 } \
673}
674
675/* VABSDU - Vector absolute difference unsigned
676 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
677 * element - element type to access from vector
678 */
679#define VABSDU(type, element) \
680 VABSDU_DO(absdu##type, element)
681VABSDU(b, u8)
682VABSDU(h, u16)
683VABSDU(w, u32)
684#undef VABSDU_DO
685#undef VABSDU
686
64654ded 687#define VCF(suffix, cvt, element) \
d15f74fb
BS
688 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
689 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
690 { \
691 int i; \
692 \
693 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
694 float32 t = cvt(b->element[i], &env->vec_status); \
695 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
696 } \
697 }
698VCF(ux, uint32_to_float32, u32)
699VCF(sx, int32_to_float32, s32)
700#undef VCF
701
702#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
703 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
704 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 705 { \
6f3dab41
TM
706 uint64_t ones = (uint64_t)-1; \
707 uint64_t all = ones; \
708 uint64_t none = 0; \
64654ded
BS
709 int i; \
710 \
711 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 712 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
713 ones : 0x0); \
714 switch (sizeof(a->element[0])) { \
6f3dab41
TM
715 case 8: \
716 r->u64[i] = result; \
717 break; \
64654ded
BS
718 case 4: \
719 r->u32[i] = result; \
720 break; \
721 case 2: \
722 r->u16[i] = result; \
723 break; \
724 case 1: \
725 r->u8[i] = result; \
726 break; \
727 } \
728 all &= result; \
729 none |= result; \
730 } \
731 if (record) { \
732 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
733 } \
734 }
735#define VCMP(suffix, compare, element) \
736 VCMP_DO(suffix, compare, element, 0) \
737 VCMP_DO(suffix##_dot, compare, element, 1)
738VCMP(equb, ==, u8)
739VCMP(equh, ==, u16)
740VCMP(equw, ==, u32)
6f3dab41 741VCMP(equd, ==, u64)
64654ded
BS
742VCMP(gtub, >, u8)
743VCMP(gtuh, >, u16)
744VCMP(gtuw, >, u32)
6f3dab41 745VCMP(gtud, >, u64)
64654ded
BS
746VCMP(gtsb, >, s8)
747VCMP(gtsh, >, s16)
748VCMP(gtsw, >, s32)
6f3dab41 749VCMP(gtsd, >, s64)
64654ded
BS
750#undef VCMP_DO
751#undef VCMP
752
0fa59364
RS
753#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
754void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
f7cc8466
SB
755 ppc_avr_t *a, ppc_avr_t *b) \
756{ \
757 etype ones = (etype)-1; \
758 etype all = ones; \
0fa59364 759 etype result, none = 0; \
f7cc8466
SB
760 int i; \
761 \
762 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
0fa59364
RS
763 if (cmpzero) { \
764 result = ((a->element[i] == 0) \
f7cc8466
SB
765 || (b->element[i] == 0) \
766 || (a->element[i] != b->element[i]) ? \
767 ones : 0x0); \
0fa59364
RS
768 } else { \
769 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
770 } \
f7cc8466
SB
771 r->element[i] = result; \
772 all &= result; \
773 none |= result; \
774 } \
775 if (record) { \
776 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
777 } \
778}
779
780/* VCMPNEZ - Vector compare not equal to zero
781 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
782 * element - element type to access from vector
783 */
0fa59364
RS
784#define VCMPNE(suffix, element, etype, cmpzero) \
785 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
786 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
787VCMPNE(zb, u8, uint8_t, 1)
788VCMPNE(zh, u16, uint16_t, 1)
789VCMPNE(zw, u32, uint32_t, 1)
790VCMPNE(b, u8, uint8_t, 0)
791VCMPNE(h, u16, uint16_t, 0)
792VCMPNE(w, u32, uint32_t, 0)
793#undef VCMPNE_DO
794#undef VCMPNE
f7cc8466 795
64654ded 796#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
797 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
798 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
799 { \
800 uint32_t ones = (uint32_t)-1; \
801 uint32_t all = ones; \
802 uint32_t none = 0; \
803 int i; \
804 \
805 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
806 uint32_t result; \
807 int rel = float32_compare_quiet(a->f[i], b->f[i], \
808 &env->vec_status); \
809 if (rel == float_relation_unordered) { \
810 result = 0; \
811 } else if (rel compare order) { \
812 result = ones; \
813 } else { \
814 result = 0; \
815 } \
816 r->u32[i] = result; \
817 all &= result; \
818 none |= result; \
819 } \
820 if (record) { \
821 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
822 } \
823 }
824#define VCMPFP(suffix, compare, order) \
825 VCMPFP_DO(suffix, compare, order, 0) \
826 VCMPFP_DO(suffix##_dot, compare, order, 1)
827VCMPFP(eqfp, ==, float_relation_equal)
828VCMPFP(gefp, !=, float_relation_less)
829VCMPFP(gtfp, ==, float_relation_greater)
830#undef VCMPFP_DO
831#undef VCMPFP
832
d15f74fb
BS
833static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
834 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
835{
836 int i;
837 int all_in = 0;
838
839 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
840 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
841 if (le_rel == float_relation_unordered) {
842 r->u32[i] = 0xc0000000;
4007b8de 843 all_in = 1;
64654ded
BS
844 } else {
845 float32 bneg = float32_chs(b->f[i]);
846 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
847 int le = le_rel != float_relation_greater;
848 int ge = ge_rel != float_relation_less;
849
850 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
851 all_in |= (!le | !ge);
852 }
853 }
854 if (record) {
855 env->crf[6] = (all_in == 0) << 1;
856 }
857}
858
d15f74fb 859void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 860{
d15f74fb 861 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
862}
863
d15f74fb
BS
864void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
865 ppc_avr_t *b)
64654ded 866{
d15f74fb 867 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
868}
869
870#define VCT(suffix, satcvt, element) \
d15f74fb
BS
871 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
872 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
873 { \
874 int i; \
875 int sat = 0; \
876 float_status s = env->vec_status; \
877 \
878 set_float_rounding_mode(float_round_to_zero, &s); \
879 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
880 if (float32_is_any_nan(b->f[i])) { \
881 r->element[i] = 0; \
882 } else { \
883 float64 t = float32_to_float64(b->f[i], &s); \
884 int64_t j; \
885 \
886 t = float64_scalbn(t, uim, &s); \
887 j = float64_to_int64(t, &s); \
888 r->element[i] = satcvt(j, &sat); \
889 } \
890 } \
891 if (sat) { \
892 env->vscr |= (1 << VSCR_SAT); \
893 } \
894 }
895VCT(uxs, cvtsduw, u32)
896VCT(sxs, cvtsdsw, s32)
897#undef VCT
898
4879538c
RS
899target_ulong helper_vclzlsbb(ppc_avr_t *r)
900{
901 target_ulong count = 0;
902 int i;
903 VECTOR_FOR_INORDER_I(i, u8) {
904 if (r->u8[i] & 0x01) {
905 break;
906 }
907 count++;
908 }
909 return count;
910}
911
912target_ulong helper_vctzlsbb(ppc_avr_t *r)
913{
914 target_ulong count = 0;
915 int i;
916#if defined(HOST_WORDS_BIGENDIAN)
917 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
918#else
919 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
920#endif
921 if (r->u8[i] & 0x01) {
922 break;
923 }
924 count++;
925 }
926 return count;
927}
928
d15f74fb
BS
929void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
930 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
931{
932 int sat = 0;
933 int i;
934
935 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
936 int32_t prod = a->s16[i] * b->s16[i];
937 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
938
939 r->s16[i] = cvtswsh(t, &sat);
940 }
941
942 if (sat) {
943 env->vscr |= (1 << VSCR_SAT);
944 }
945}
946
d15f74fb
BS
947void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
949{
950 int sat = 0;
951 int i;
952
953 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
954 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
955 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
956 r->s16[i] = cvtswsh(t, &sat);
957 }
958
959 if (sat) {
960 env->vscr |= (1 << VSCR_SAT);
961 }
962}
963
964#define VMINMAX_DO(name, compare, element) \
965 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
966 { \
967 int i; \
968 \
969 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
970 if (a->element[i] compare b->element[i]) { \
971 r->element[i] = b->element[i]; \
972 } else { \
973 r->element[i] = a->element[i]; \
974 } \
975 } \
976 }
977#define VMINMAX(suffix, element) \
978 VMINMAX_DO(min##suffix, >, element) \
979 VMINMAX_DO(max##suffix, <, element)
980VMINMAX(sb, s8)
981VMINMAX(sh, s16)
982VMINMAX(sw, s32)
8203e31b 983VMINMAX(sd, s64)
64654ded
BS
984VMINMAX(ub, u8)
985VMINMAX(uh, u16)
986VMINMAX(uw, u32)
8203e31b 987VMINMAX(ud, u64)
64654ded
BS
988#undef VMINMAX_DO
989#undef VMINMAX
990
64654ded
BS
991void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
992{
993 int i;
994
995 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
996 int32_t prod = a->s16[i] * b->s16[i];
997 r->s16[i] = (int16_t) (prod + c->s16[i]);
998 }
999}
1000
1001#define VMRG_DO(name, element, highp) \
1002 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1003 { \
1004 ppc_avr_t result; \
1005 int i; \
1006 size_t n_elems = ARRAY_SIZE(r->element); \
1007 \
1008 for (i = 0; i < n_elems / 2; i++) { \
1009 if (highp) { \
1010 result.element[i*2+HI_IDX] = a->element[i]; \
1011 result.element[i*2+LO_IDX] = b->element[i]; \
1012 } else { \
1013 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1014 b->element[n_elems - i - 1]; \
1015 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1016 a->element[n_elems - i - 1]; \
1017 } \
1018 } \
1019 *r = result; \
1020 }
1021#if defined(HOST_WORDS_BIGENDIAN)
1022#define MRGHI 0
1023#define MRGLO 1
1024#else
1025#define MRGHI 1
1026#define MRGLO 0
1027#endif
1028#define VMRG(suffix, element) \
1029 VMRG_DO(mrgl##suffix, element, MRGHI) \
1030 VMRG_DO(mrgh##suffix, element, MRGLO)
1031VMRG(b, u8)
1032VMRG(h, u16)
1033VMRG(w, u32)
1034#undef VMRG_DO
1035#undef VMRG
1036#undef MRGHI
1037#undef MRGLO
1038
d15f74fb
BS
1039void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1040 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1041{
1042 int32_t prod[16];
1043 int i;
1044
1045 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1046 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1047 }
1048
1049 VECTOR_FOR_INORDER_I(i, s32) {
1050 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1051 prod[4 * i + 2] + prod[4 * i + 3];
1052 }
1053}
1054
d15f74fb
BS
1055void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1056 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1057{
1058 int32_t prod[8];
1059 int i;
1060
1061 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1062 prod[i] = a->s16[i] * b->s16[i];
1063 }
1064
1065 VECTOR_FOR_INORDER_I(i, s32) {
1066 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1067 }
1068}
1069
d15f74fb
BS
1070void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1071 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1072{
1073 int32_t prod[8];
1074 int i;
1075 int sat = 0;
1076
1077 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1078 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1079 }
1080
1081 VECTOR_FOR_INORDER_I(i, s32) {
1082 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1083
1084 r->u32[i] = cvtsdsw(t, &sat);
1085 }
1086
1087 if (sat) {
1088 env->vscr |= (1 << VSCR_SAT);
1089 }
1090}
1091
d15f74fb
BS
1092void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1093 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1094{
1095 uint16_t prod[16];
1096 int i;
1097
1098 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1099 prod[i] = a->u8[i] * b->u8[i];
1100 }
1101
1102 VECTOR_FOR_INORDER_I(i, u32) {
1103 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1104 prod[4 * i + 2] + prod[4 * i + 3];
1105 }
1106}
1107
d15f74fb
BS
1108void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1109 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1110{
1111 uint32_t prod[8];
1112 int i;
1113
1114 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1115 prod[i] = a->u16[i] * b->u16[i];
1116 }
1117
1118 VECTOR_FOR_INORDER_I(i, u32) {
1119 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1120 }
1121}
1122
d15f74fb
BS
1123void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1124 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1125{
1126 uint32_t prod[8];
1127 int i;
1128 int sat = 0;
1129
1130 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1131 prod[i] = a->u16[i] * b->u16[i];
1132 }
1133
1134 VECTOR_FOR_INORDER_I(i, s32) {
1135 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1136
1137 r->u32[i] = cvtuduw(t, &sat);
1138 }
1139
1140 if (sat) {
1141 env->vscr |= (1 << VSCR_SAT);
1142 }
1143}
1144
aa9e930c 1145#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1146 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1147 { \
1148 int i; \
1149 \
1150 VECTOR_FOR_INORDER_I(i, prod_element) { \
1151 if (evenp) { \
aa9e930c
TM
1152 r->prod_element[i] = \
1153 (cast)a->mul_element[i * 2 + HI_IDX] * \
1154 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1155 } else { \
aa9e930c
TM
1156 r->prod_element[i] = \
1157 (cast)a->mul_element[i * 2 + LO_IDX] * \
1158 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1159 } \
1160 } \
1161 }
aa9e930c
TM
1162#define VMUL(suffix, mul_element, prod_element, cast) \
1163 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1164 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1165VMUL(sb, s8, s16, int16_t)
1166VMUL(sh, s16, s32, int32_t)
63be0936 1167VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1168VMUL(ub, u8, u16, uint16_t)
1169VMUL(uh, u16, u32, uint32_t)
63be0936 1170VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1171#undef VMUL_DO
1172#undef VMUL
1173
d15f74fb
BS
1174void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1175 ppc_avr_t *c)
64654ded
BS
1176{
1177 ppc_avr_t result;
1178 int i;
1179
1180 VECTOR_FOR_INORDER_I(i, u8) {
1181 int s = c->u8[i] & 0x1f;
1182#if defined(HOST_WORDS_BIGENDIAN)
1183 int index = s & 0xf;
1184#else
1185 int index = 15 - (s & 0xf);
1186#endif
1187
1188 if (s & 0x10) {
1189 result.u8[i] = b->u8[index];
1190 } else {
1191 result.u8[i] = a->u8[index];
1192 }
1193 }
1194 *r = result;
1195}
1196
ab045436
RS
1197void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1198 ppc_avr_t *c)
1199{
1200 ppc_avr_t result;
1201 int i;
1202
1203 VECTOR_FOR_INORDER_I(i, u8) {
1204 int s = c->u8[i] & 0x1f;
1205#if defined(HOST_WORDS_BIGENDIAN)
1206 int index = 15 - (s & 0xf);
1207#else
1208 int index = s & 0xf;
1209#endif
1210
1211 if (s & 0x10) {
1212 result.u8[i] = a->u8[index];
1213 } else {
1214 result.u8[i] = b->u8[index];
1215 }
1216 }
1217 *r = result;
1218}
1219
4d82038e
TM
1220#if defined(HOST_WORDS_BIGENDIAN)
1221#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1222#define VBPERMD_INDEX(i) (i)
4d82038e 1223#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
01fe9a47 1224#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
4d82038e
TM
1225#else
1226#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
01fe9a47 1227#define VBPERMD_INDEX(i) (1 - i)
4d82038e 1228#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
01fe9a47
RS
1229#define EXTRACT_BIT(avr, i, index) \
1230 (extract64((avr)->u64[1 - i], 63 - index, 1))
4d82038e
TM
1231#endif
1232
01fe9a47
RS
1233void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1234{
1235 int i, j;
1236 ppc_avr_t result = { .u64 = { 0, 0 } };
1237 VECTOR_FOR_INORDER_I(i, u64) {
1238 for (j = 0; j < 8; j++) {
1239 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1240 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1241 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1242 }
1243 }
1244 }
1245 *r = result;
1246}
1247
4d82038e
TM
1248void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1249{
1250 int i;
1251 uint64_t perm = 0;
1252
1253 VECTOR_FOR_INORDER_I(i, u8) {
1254 int index = VBPERMQ_INDEX(b, i);
1255
1256 if (index < 128) {
1257 uint64_t mask = (1ull << (63-(index & 0x3F)));
1258 if (a->u64[VBPERMQ_DW(index)] & mask) {
1259 perm |= (0x8000 >> i);
1260 }
1261 }
1262 }
1263
1264 r->u64[HI_IDX] = perm;
1265 r->u64[LO_IDX] = 0;
1266}
1267
1268#undef VBPERMQ_INDEX
1269#undef VBPERMQ_DW
1270
cfd54a04 1271static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1272 0x0000000000000000ull, /* 00 */
1273 0x0000000000000080ull, /* 01 */
1274 0x0000000000008000ull, /* 02 */
1275 0x0000000000008080ull, /* 03 */
1276 0x0000000000800000ull, /* 04 */
1277 0x0000000000800080ull, /* 05 */
1278 0x0000000000808000ull, /* 06 */
1279 0x0000000000808080ull, /* 07 */
1280 0x0000000080000000ull, /* 08 */
1281 0x0000000080000080ull, /* 09 */
1282 0x0000000080008000ull, /* 0A */
1283 0x0000000080008080ull, /* 0B */
1284 0x0000000080800000ull, /* 0C */
1285 0x0000000080800080ull, /* 0D */
1286 0x0000000080808000ull, /* 0E */
1287 0x0000000080808080ull, /* 0F */
1288 0x0000008000000000ull, /* 10 */
1289 0x0000008000000080ull, /* 11 */
1290 0x0000008000008000ull, /* 12 */
1291 0x0000008000008080ull, /* 13 */
1292 0x0000008000800000ull, /* 14 */
1293 0x0000008000800080ull, /* 15 */
1294 0x0000008000808000ull, /* 16 */
1295 0x0000008000808080ull, /* 17 */
1296 0x0000008080000000ull, /* 18 */
1297 0x0000008080000080ull, /* 19 */
1298 0x0000008080008000ull, /* 1A */
1299 0x0000008080008080ull, /* 1B */
1300 0x0000008080800000ull, /* 1C */
1301 0x0000008080800080ull, /* 1D */
1302 0x0000008080808000ull, /* 1E */
1303 0x0000008080808080ull, /* 1F */
1304 0x0000800000000000ull, /* 20 */
1305 0x0000800000000080ull, /* 21 */
1306 0x0000800000008000ull, /* 22 */
1307 0x0000800000008080ull, /* 23 */
1308 0x0000800000800000ull, /* 24 */
1309 0x0000800000800080ull, /* 25 */
1310 0x0000800000808000ull, /* 26 */
1311 0x0000800000808080ull, /* 27 */
1312 0x0000800080000000ull, /* 28 */
1313 0x0000800080000080ull, /* 29 */
1314 0x0000800080008000ull, /* 2A */
1315 0x0000800080008080ull, /* 2B */
1316 0x0000800080800000ull, /* 2C */
1317 0x0000800080800080ull, /* 2D */
1318 0x0000800080808000ull, /* 2E */
1319 0x0000800080808080ull, /* 2F */
1320 0x0000808000000000ull, /* 30 */
1321 0x0000808000000080ull, /* 31 */
1322 0x0000808000008000ull, /* 32 */
1323 0x0000808000008080ull, /* 33 */
1324 0x0000808000800000ull, /* 34 */
1325 0x0000808000800080ull, /* 35 */
1326 0x0000808000808000ull, /* 36 */
1327 0x0000808000808080ull, /* 37 */
1328 0x0000808080000000ull, /* 38 */
1329 0x0000808080000080ull, /* 39 */
1330 0x0000808080008000ull, /* 3A */
1331 0x0000808080008080ull, /* 3B */
1332 0x0000808080800000ull, /* 3C */
1333 0x0000808080800080ull, /* 3D */
1334 0x0000808080808000ull, /* 3E */
1335 0x0000808080808080ull, /* 3F */
1336 0x0080000000000000ull, /* 40 */
1337 0x0080000000000080ull, /* 41 */
1338 0x0080000000008000ull, /* 42 */
1339 0x0080000000008080ull, /* 43 */
1340 0x0080000000800000ull, /* 44 */
1341 0x0080000000800080ull, /* 45 */
1342 0x0080000000808000ull, /* 46 */
1343 0x0080000000808080ull, /* 47 */
1344 0x0080000080000000ull, /* 48 */
1345 0x0080000080000080ull, /* 49 */
1346 0x0080000080008000ull, /* 4A */
1347 0x0080000080008080ull, /* 4B */
1348 0x0080000080800000ull, /* 4C */
1349 0x0080000080800080ull, /* 4D */
1350 0x0080000080808000ull, /* 4E */
1351 0x0080000080808080ull, /* 4F */
1352 0x0080008000000000ull, /* 50 */
1353 0x0080008000000080ull, /* 51 */
1354 0x0080008000008000ull, /* 52 */
1355 0x0080008000008080ull, /* 53 */
1356 0x0080008000800000ull, /* 54 */
1357 0x0080008000800080ull, /* 55 */
1358 0x0080008000808000ull, /* 56 */
1359 0x0080008000808080ull, /* 57 */
1360 0x0080008080000000ull, /* 58 */
1361 0x0080008080000080ull, /* 59 */
1362 0x0080008080008000ull, /* 5A */
1363 0x0080008080008080ull, /* 5B */
1364 0x0080008080800000ull, /* 5C */
1365 0x0080008080800080ull, /* 5D */
1366 0x0080008080808000ull, /* 5E */
1367 0x0080008080808080ull, /* 5F */
1368 0x0080800000000000ull, /* 60 */
1369 0x0080800000000080ull, /* 61 */
1370 0x0080800000008000ull, /* 62 */
1371 0x0080800000008080ull, /* 63 */
1372 0x0080800000800000ull, /* 64 */
1373 0x0080800000800080ull, /* 65 */
1374 0x0080800000808000ull, /* 66 */
1375 0x0080800000808080ull, /* 67 */
1376 0x0080800080000000ull, /* 68 */
1377 0x0080800080000080ull, /* 69 */
1378 0x0080800080008000ull, /* 6A */
1379 0x0080800080008080ull, /* 6B */
1380 0x0080800080800000ull, /* 6C */
1381 0x0080800080800080ull, /* 6D */
1382 0x0080800080808000ull, /* 6E */
1383 0x0080800080808080ull, /* 6F */
1384 0x0080808000000000ull, /* 70 */
1385 0x0080808000000080ull, /* 71 */
1386 0x0080808000008000ull, /* 72 */
1387 0x0080808000008080ull, /* 73 */
1388 0x0080808000800000ull, /* 74 */
1389 0x0080808000800080ull, /* 75 */
1390 0x0080808000808000ull, /* 76 */
1391 0x0080808000808080ull, /* 77 */
1392 0x0080808080000000ull, /* 78 */
1393 0x0080808080000080ull, /* 79 */
1394 0x0080808080008000ull, /* 7A */
1395 0x0080808080008080ull, /* 7B */
1396 0x0080808080800000ull, /* 7C */
1397 0x0080808080800080ull, /* 7D */
1398 0x0080808080808000ull, /* 7E */
1399 0x0080808080808080ull, /* 7F */
1400 0x8000000000000000ull, /* 80 */
1401 0x8000000000000080ull, /* 81 */
1402 0x8000000000008000ull, /* 82 */
1403 0x8000000000008080ull, /* 83 */
1404 0x8000000000800000ull, /* 84 */
1405 0x8000000000800080ull, /* 85 */
1406 0x8000000000808000ull, /* 86 */
1407 0x8000000000808080ull, /* 87 */
1408 0x8000000080000000ull, /* 88 */
1409 0x8000000080000080ull, /* 89 */
1410 0x8000000080008000ull, /* 8A */
1411 0x8000000080008080ull, /* 8B */
1412 0x8000000080800000ull, /* 8C */
1413 0x8000000080800080ull, /* 8D */
1414 0x8000000080808000ull, /* 8E */
1415 0x8000000080808080ull, /* 8F */
1416 0x8000008000000000ull, /* 90 */
1417 0x8000008000000080ull, /* 91 */
1418 0x8000008000008000ull, /* 92 */
1419 0x8000008000008080ull, /* 93 */
1420 0x8000008000800000ull, /* 94 */
1421 0x8000008000800080ull, /* 95 */
1422 0x8000008000808000ull, /* 96 */
1423 0x8000008000808080ull, /* 97 */
1424 0x8000008080000000ull, /* 98 */
1425 0x8000008080000080ull, /* 99 */
1426 0x8000008080008000ull, /* 9A */
1427 0x8000008080008080ull, /* 9B */
1428 0x8000008080800000ull, /* 9C */
1429 0x8000008080800080ull, /* 9D */
1430 0x8000008080808000ull, /* 9E */
1431 0x8000008080808080ull, /* 9F */
1432 0x8000800000000000ull, /* A0 */
1433 0x8000800000000080ull, /* A1 */
1434 0x8000800000008000ull, /* A2 */
1435 0x8000800000008080ull, /* A3 */
1436 0x8000800000800000ull, /* A4 */
1437 0x8000800000800080ull, /* A5 */
1438 0x8000800000808000ull, /* A6 */
1439 0x8000800000808080ull, /* A7 */
1440 0x8000800080000000ull, /* A8 */
1441 0x8000800080000080ull, /* A9 */
1442 0x8000800080008000ull, /* AA */
1443 0x8000800080008080ull, /* AB */
1444 0x8000800080800000ull, /* AC */
1445 0x8000800080800080ull, /* AD */
1446 0x8000800080808000ull, /* AE */
1447 0x8000800080808080ull, /* AF */
1448 0x8000808000000000ull, /* B0 */
1449 0x8000808000000080ull, /* B1 */
1450 0x8000808000008000ull, /* B2 */
1451 0x8000808000008080ull, /* B3 */
1452 0x8000808000800000ull, /* B4 */
1453 0x8000808000800080ull, /* B5 */
1454 0x8000808000808000ull, /* B6 */
1455 0x8000808000808080ull, /* B7 */
1456 0x8000808080000000ull, /* B8 */
1457 0x8000808080000080ull, /* B9 */
1458 0x8000808080008000ull, /* BA */
1459 0x8000808080008080ull, /* BB */
1460 0x8000808080800000ull, /* BC */
1461 0x8000808080800080ull, /* BD */
1462 0x8000808080808000ull, /* BE */
1463 0x8000808080808080ull, /* BF */
1464 0x8080000000000000ull, /* C0 */
1465 0x8080000000000080ull, /* C1 */
1466 0x8080000000008000ull, /* C2 */
1467 0x8080000000008080ull, /* C3 */
1468 0x8080000000800000ull, /* C4 */
1469 0x8080000000800080ull, /* C5 */
1470 0x8080000000808000ull, /* C6 */
1471 0x8080000000808080ull, /* C7 */
1472 0x8080000080000000ull, /* C8 */
1473 0x8080000080000080ull, /* C9 */
1474 0x8080000080008000ull, /* CA */
1475 0x8080000080008080ull, /* CB */
1476 0x8080000080800000ull, /* CC */
1477 0x8080000080800080ull, /* CD */
1478 0x8080000080808000ull, /* CE */
1479 0x8080000080808080ull, /* CF */
1480 0x8080008000000000ull, /* D0 */
1481 0x8080008000000080ull, /* D1 */
1482 0x8080008000008000ull, /* D2 */
1483 0x8080008000008080ull, /* D3 */
1484 0x8080008000800000ull, /* D4 */
1485 0x8080008000800080ull, /* D5 */
1486 0x8080008000808000ull, /* D6 */
1487 0x8080008000808080ull, /* D7 */
1488 0x8080008080000000ull, /* D8 */
1489 0x8080008080000080ull, /* D9 */
1490 0x8080008080008000ull, /* DA */
1491 0x8080008080008080ull, /* DB */
1492 0x8080008080800000ull, /* DC */
1493 0x8080008080800080ull, /* DD */
1494 0x8080008080808000ull, /* DE */
1495 0x8080008080808080ull, /* DF */
1496 0x8080800000000000ull, /* E0 */
1497 0x8080800000000080ull, /* E1 */
1498 0x8080800000008000ull, /* E2 */
1499 0x8080800000008080ull, /* E3 */
1500 0x8080800000800000ull, /* E4 */
1501 0x8080800000800080ull, /* E5 */
1502 0x8080800000808000ull, /* E6 */
1503 0x8080800000808080ull, /* E7 */
1504 0x8080800080000000ull, /* E8 */
1505 0x8080800080000080ull, /* E9 */
1506 0x8080800080008000ull, /* EA */
1507 0x8080800080008080ull, /* EB */
1508 0x8080800080800000ull, /* EC */
1509 0x8080800080800080ull, /* ED */
1510 0x8080800080808000ull, /* EE */
1511 0x8080800080808080ull, /* EF */
1512 0x8080808000000000ull, /* F0 */
1513 0x8080808000000080ull, /* F1 */
1514 0x8080808000008000ull, /* F2 */
1515 0x8080808000008080ull, /* F3 */
1516 0x8080808000800000ull, /* F4 */
1517 0x8080808000800080ull, /* F5 */
1518 0x8080808000808000ull, /* F6 */
1519 0x8080808000808080ull, /* F7 */
1520 0x8080808080000000ull, /* F8 */
1521 0x8080808080000080ull, /* F9 */
1522 0x8080808080008000ull, /* FA */
1523 0x8080808080008080ull, /* FB */
1524 0x8080808080800000ull, /* FC */
1525 0x8080808080800080ull, /* FD */
1526 0x8080808080808000ull, /* FE */
1527 0x8080808080808080ull, /* FF */
1528};
1529
1530void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1531{
1532 int i;
1533 uint64_t t[2] = { 0, 0 };
1534
1535 VECTOR_FOR_INORDER_I(i, u8) {
1536#if defined(HOST_WORDS_BIGENDIAN)
1537 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1538#else
1539 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1540#endif
1541 }
1542
1543 r->u64[0] = t[0];
1544 r->u64[1] = t[1];
1545}
1546
b8476fc7
TM
1547#define PMSUM(name, srcfld, trgfld, trgtyp) \
1548void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1549{ \
1550 int i, j; \
1551 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1552 \
1553 VECTOR_FOR_INORDER_I(i, srcfld) { \
1554 prod[i] = 0; \
1555 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1556 if (a->srcfld[i] & (1ull<<j)) { \
1557 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1558 } \
1559 } \
1560 } \
1561 \
1562 VECTOR_FOR_INORDER_I(i, trgfld) { \
1563 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1564 } \
1565}
1566
1567PMSUM(vpmsumb, u8, u16, uint16_t)
1568PMSUM(vpmsumh, u16, u32, uint32_t)
1569PMSUM(vpmsumw, u32, u64, uint64_t)
1570
1571void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1572{
1573
1574#ifdef CONFIG_INT128
1575 int i, j;
1576 __uint128_t prod[2];
1577
1578 VECTOR_FOR_INORDER_I(i, u64) {
1579 prod[i] = 0;
1580 for (j = 0; j < 64; j++) {
1581 if (a->u64[i] & (1ull<<j)) {
1582 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1583 }
1584 }
1585 }
1586
1587 r->u128 = prod[0] ^ prod[1];
1588
1589#else
1590 int i, j;
1591 ppc_avr_t prod[2];
1592
1593 VECTOR_FOR_INORDER_I(i, u64) {
1594 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1595 for (j = 0; j < 64; j++) {
1596 if (a->u64[i] & (1ull<<j)) {
1597 ppc_avr_t bshift;
1598 if (j == 0) {
1599 bshift.u64[HI_IDX] = 0;
1600 bshift.u64[LO_IDX] = b->u64[i];
1601 } else {
1602 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1603 bshift.u64[LO_IDX] = b->u64[i] << j;
1604 }
1605 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1606 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1607 }
1608 }
1609 }
1610
1611 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1612 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1613#endif
1614}
1615
1616
64654ded
BS
1617#if defined(HOST_WORDS_BIGENDIAN)
1618#define PKBIG 1
1619#else
1620#define PKBIG 0
1621#endif
1622void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1623{
1624 int i, j;
1625 ppc_avr_t result;
1626#if defined(HOST_WORDS_BIGENDIAN)
1627 const ppc_avr_t *x[2] = { a, b };
1628#else
1629 const ppc_avr_t *x[2] = { b, a };
1630#endif
1631
1632 VECTOR_FOR_INORDER_I(i, u64) {
1633 VECTOR_FOR_INORDER_I(j, u32) {
1634 uint32_t e = x[i]->u32[j];
1635
1636 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1637 ((e >> 6) & 0x3e0) |
1638 ((e >> 3) & 0x1f));
1639 }
1640 }
1641 *r = result;
1642}
1643
1644#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1645 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1646 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1647 { \
1648 int i; \
1649 int sat = 0; \
1650 ppc_avr_t result; \
1651 ppc_avr_t *a0 = PKBIG ? a : b; \
1652 ppc_avr_t *a1 = PKBIG ? b : a; \
1653 \
1654 VECTOR_FOR_INORDER_I(i, from) { \
1655 result.to[i] = cvt(a0->from[i], &sat); \
1656 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1657 } \
1658 *r = result; \
1659 if (dosat && sat) { \
1660 env->vscr |= (1 << VSCR_SAT); \
1661 } \
1662 }
1663#define I(x, y) (x)
1664VPK(shss, s16, s8, cvtshsb, 1)
1665VPK(shus, s16, u8, cvtshub, 1)
1666VPK(swss, s32, s16, cvtswsh, 1)
1667VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1668VPK(sdss, s64, s32, cvtsdsw, 1)
1669VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1670VPK(uhus, u16, u8, cvtuhub, 1)
1671VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1672VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1673VPK(uhum, u16, u8, I, 0)
1674VPK(uwum, u32, u16, I, 0)
024215b2 1675VPK(udum, u64, u32, I, 0)
64654ded
BS
1676#undef I
1677#undef VPK
1678#undef PKBIG
1679
d15f74fb 1680void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1681{
1682 int i;
1683
1684 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1685 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1686 }
1687}
1688
1689#define VRFI(suffix, rounding) \
d15f74fb
BS
1690 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1691 ppc_avr_t *b) \
64654ded
BS
1692 { \
1693 int i; \
1694 float_status s = env->vec_status; \
1695 \
1696 set_float_rounding_mode(rounding, &s); \
1697 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1698 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1699 } \
1700 }
1701VRFI(n, float_round_nearest_even)
1702VRFI(m, float_round_down)
1703VRFI(p, float_round_up)
1704VRFI(z, float_round_to_zero)
1705#undef VRFI
1706
818692ff 1707#define VROTATE(suffix, element, mask) \
64654ded
BS
1708 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1709 { \
1710 int i; \
1711 \
1712 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1713 unsigned int shift = b->element[i] & mask; \
1714 r->element[i] = (a->element[i] << shift) | \
1715 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1716 } \
1717 }
818692ff
TM
1718VROTATE(b, u8, 0x7)
1719VROTATE(h, u16, 0xF)
1720VROTATE(w, u32, 0x1F)
2fdf78e6 1721VROTATE(d, u64, 0x3F)
64654ded
BS
1722#undef VROTATE
1723
d15f74fb 1724void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1725{
1726 int i;
1727
1728 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1729 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1730
ef9bd150 1731 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1732 }
1733}
1734
09a245e1 1735#define VRLMI(name, size, element, insert) \
3e00884f
GS
1736void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1737{ \
1738 int i; \
1739 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1740 uint##size##_t src1 = a->element[i]; \
1741 uint##size##_t src2 = b->element[i]; \
1742 uint##size##_t src3 = r->element[i]; \
1743 uint##size##_t begin, end, shift, mask, rot_val; \
1744 \
1745 shift = extract##size(src2, 0, 6); \
1746 end = extract##size(src2, 8, 6); \
1747 begin = extract##size(src2, 16, 6); \
1748 rot_val = rol##size(src1, shift); \
1749 mask = mask_u##size(begin, end); \
09a245e1
BR
1750 if (insert) { \
1751 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1752 } else { \
1753 r->element[i] = (rot_val & mask); \
1754 } \
3e00884f
GS
1755 } \
1756}
1757
09a245e1
BR
1758VRLMI(vrldmi, 64, u64, 1);
1759VRLMI(vrlwmi, 32, u32, 1);
1760VRLMI(vrldnm, 64, u64, 0);
1761VRLMI(vrlwnm, 32, u32, 0);
3e00884f 1762
d15f74fb
BS
1763void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1764 ppc_avr_t *c)
64654ded
BS
1765{
1766 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1767 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1768}
1769
d15f74fb 1770void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1771{
1772 int i;
1773
1774 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1775 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1776 }
1777}
1778
d15f74fb 1779void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1780{
1781 int i;
1782
1783 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1784 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1785 }
1786}
1787
64654ded
BS
1788/* The specification says that the results are undefined if all of the
1789 * shift counts are not identical. We check to make sure that they are
1790 * to conform to what real hardware appears to do. */
1791#define VSHIFT(suffix, leftp) \
1792 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1793 { \
1794 int shift = b->u8[LO_IDX*15] & 0x7; \
1795 int doit = 1; \
1796 int i; \
1797 \
1798 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1799 doit = doit && ((b->u8[i] & 0x7) == shift); \
1800 } \
1801 if (doit) { \
1802 if (shift == 0) { \
1803 *r = *a; \
1804 } else if (leftp) { \
1805 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1806 \
1807 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1808 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1809 } else { \
1810 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1811 \
1812 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1813 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1814 } \
1815 } \
1816 }
24e669ba
TM
1817VSHIFT(l, 1)
1818VSHIFT(r, 0)
64654ded 1819#undef VSHIFT
64654ded 1820
818692ff 1821#define VSL(suffix, element, mask) \
64654ded
BS
1822 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1823 { \
1824 int i; \
1825 \
1826 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1827 unsigned int shift = b->element[i] & mask; \
1828 \
1829 r->element[i] = a->element[i] << shift; \
1830 } \
1831 }
818692ff
TM
1832VSL(b, u8, 0x7)
1833VSL(h, u16, 0x0F)
1834VSL(w, u32, 0x1F)
2fdf78e6 1835VSL(d, u64, 0x3F)
64654ded
BS
1836#undef VSL
1837
5644a175
VAS
1838void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1839{
1840 int i;
1841 unsigned int shift, bytes, size;
1842
1843 size = ARRAY_SIZE(r->u8);
1844 for (i = 0; i < size; i++) {
1845 shift = b->u8[i] & 0x7; /* extract shift value */
1846 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1847 (((i + 1) < size) ? a->u8[i + 1] : 0);
1848 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1849 }
1850}
1851
4004c1db
VAS
1852void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1853{
1854 int i;
1855 unsigned int shift, bytes;
1856
1857 /* Use reverse order, as destination and source register can be same. Its
1858 * being modified in place saving temporary, reverse order will guarantee
1859 * that computed result is not fed back.
1860 */
1861 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1862 shift = b->u8[i] & 0x7; /* extract shift value */
1863 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1864 /* extract adjacent bytes */
1865 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1866 }
1867}
1868
64654ded
BS
1869void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1870{
1871 int sh = shift & 0xf;
1872 int i;
1873 ppc_avr_t result;
1874
1875#if defined(HOST_WORDS_BIGENDIAN)
1876 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1877 int index = sh + i;
1878 if (index > 0xf) {
1879 result.u8[i] = b->u8[index - 0x10];
1880 } else {
1881 result.u8[i] = a->u8[index];
1882 }
1883 }
1884#else
1885 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1886 int index = (16 - sh) + i;
1887 if (index > 0xf) {
1888 result.u8[i] = a->u8[index - 0x10];
1889 } else {
1890 result.u8[i] = b->u8[index];
1891 }
1892 }
1893#endif
1894 *r = result;
1895}
1896
1897void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1898{
1899 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1900
1901#if defined(HOST_WORDS_BIGENDIAN)
1902 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1903 memset(&r->u8[16-sh], 0, sh);
1904#else
1905 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1906 memset(&r->u8[0], 0, sh);
1907#endif
1908}
1909
1910/* Experimental testing shows that hardware masks the immediate. */
1911#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1912#if defined(HOST_WORDS_BIGENDIAN)
1913#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1914#else
1915#define SPLAT_ELEMENT(element) \
1916 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1917#endif
1918#define VSPLT(suffix, element) \
1919 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1920 { \
1921 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1922 int i; \
1923 \
1924 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1925 r->element[i] = s; \
1926 } \
1927 }
1928VSPLT(b, u8)
1929VSPLT(h, u16)
1930VSPLT(w, u32)
1931#undef VSPLT
1932#undef SPLAT_ELEMENT
1933#undef _SPLAT_MASKED
e7b1e06f
RS
1934#if defined(HOST_WORDS_BIGENDIAN)
1935#define VINSERT(suffix, element) \
1936 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1937 { \
1938 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1939 sizeof(r->element[0])); \
1940 }
1941#else
1942#define VINSERT(suffix, element) \
1943 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1944 { \
1945 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1946 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1947 }
1948#endif
1949VINSERT(b, u8)
1950VINSERT(h, u16)
1951VINSERT(w, u32)
1952VINSERT(d, u64)
1953#undef VINSERT
b5d569a1
RS
1954#if defined(HOST_WORDS_BIGENDIAN)
1955#define VEXTRACT(suffix, element) \
1956 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1957 { \
1958 uint32_t es = sizeof(r->element[0]); \
1959 memmove(&r->u8[8 - es], &b->u8[index], es); \
1960 memset(&r->u8[8], 0, 8); \
1961 memset(&r->u8[0], 0, 8 - es); \
1962 }
1963#else
1964#define VEXTRACT(suffix, element) \
1965 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1966 { \
1967 uint32_t es = sizeof(r->element[0]); \
1968 uint32_t s = (16 - index) - es; \
1969 memmove(&r->u8[8], &b->u8[s], es); \
1970 memset(&r->u8[0], 0, 8); \
1971 memset(&r->u8[8 + es], 0, 8 - es); \
1972 }
1973#endif
1974VEXTRACT(ub, u8)
1975VEXTRACT(uh, u16)
1976VEXTRACT(uw, u32)
1977VEXTRACT(d, u64)
1978#undef VEXTRACT
64654ded 1979
125a9b23
ND
1980#define VEXT_SIGNED(name, element, mask, cast, recast) \
1981void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1982{ \
1983 int i; \
1984 VECTOR_FOR_INORDER_I(i, element) { \
1985 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
1986 } \
1987}
1988VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
1989VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
1990VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
1991VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
1992VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
1993#undef VEXT_SIGNED
1994
cc8b6e76
ND
1995#define VNEG(name, element) \
1996void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1997{ \
1998 int i; \
1999 VECTOR_FOR_INORDER_I(i, element) { \
2000 r->element[i] = -b->element[i]; \
2001 } \
2002}
2003VNEG(vnegw, s32)
2004VNEG(vnegd, s64)
2005#undef VNEG
2006
64654ded
BS
2007#define VSPLTI(suffix, element, splat_type) \
2008 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2009 { \
2010 splat_type x = (int8_t)(splat << 3) >> 3; \
2011 int i; \
2012 \
2013 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2014 r->element[i] = x; \
2015 } \
2016 }
2017VSPLTI(b, s8, int8_t)
2018VSPLTI(h, s16, int16_t)
2019VSPLTI(w, s32, int32_t)
2020#undef VSPLTI
2021
818692ff 2022#define VSR(suffix, element, mask) \
64654ded
BS
2023 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2024 { \
2025 int i; \
2026 \
2027 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 2028 unsigned int shift = b->element[i] & mask; \
64654ded
BS
2029 r->element[i] = a->element[i] >> shift; \
2030 } \
2031 }
818692ff
TM
2032VSR(ab, s8, 0x7)
2033VSR(ah, s16, 0xF)
2034VSR(aw, s32, 0x1F)
2fdf78e6 2035VSR(ad, s64, 0x3F)
818692ff
TM
2036VSR(b, u8, 0x7)
2037VSR(h, u16, 0xF)
2038VSR(w, u32, 0x1F)
2fdf78e6 2039VSR(d, u64, 0x3F)
64654ded
BS
2040#undef VSR
2041
2042void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2043{
2044 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2045
2046#if defined(HOST_WORDS_BIGENDIAN)
2047 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2048 memset(&r->u8[0], 0, sh);
2049#else
2050 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2051 memset(&r->u8[16 - sh], 0, sh);
2052#endif
2053}
2054
2055void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2056{
2057 int i;
2058
2059 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2060 r->u32[i] = a->u32[i] >= b->u32[i];
2061 }
2062}
2063
d15f74fb 2064void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2065{
2066 int64_t t;
2067 int i, upper;
2068 ppc_avr_t result;
2069 int sat = 0;
2070
2071#if defined(HOST_WORDS_BIGENDIAN)
2072 upper = ARRAY_SIZE(r->s32)-1;
2073#else
2074 upper = 0;
2075#endif
2076 t = (int64_t)b->s32[upper];
2077 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2078 t += a->s32[i];
2079 result.s32[i] = 0;
2080 }
2081 result.s32[upper] = cvtsdsw(t, &sat);
2082 *r = result;
2083
2084 if (sat) {
2085 env->vscr |= (1 << VSCR_SAT);
2086 }
2087}
2088
d15f74fb 2089void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2090{
2091 int i, j, upper;
2092 ppc_avr_t result;
2093 int sat = 0;
2094
2095#if defined(HOST_WORDS_BIGENDIAN)
2096 upper = 1;
2097#else
2098 upper = 0;
2099#endif
2100 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2101 int64_t t = (int64_t)b->s32[upper + i * 2];
2102
2103 result.u64[i] = 0;
2104 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2105 t += a->s32[2 * i + j];
2106 }
2107 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2108 }
2109
2110 *r = result;
2111 if (sat) {
2112 env->vscr |= (1 << VSCR_SAT);
2113 }
2114}
2115
d15f74fb 2116void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2117{
2118 int i, j;
2119 int sat = 0;
2120
2121 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2122 int64_t t = (int64_t)b->s32[i];
2123
2124 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2125 t += a->s8[4 * i + j];
2126 }
2127 r->s32[i] = cvtsdsw(t, &sat);
2128 }
2129
2130 if (sat) {
2131 env->vscr |= (1 << VSCR_SAT);
2132 }
2133}
2134
d15f74fb 2135void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2136{
2137 int sat = 0;
2138 int i;
2139
2140 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2141 int64_t t = (int64_t)b->s32[i];
2142
2143 t += a->s16[2 * i] + a->s16[2 * i + 1];
2144 r->s32[i] = cvtsdsw(t, &sat);
2145 }
2146
2147 if (sat) {
2148 env->vscr |= (1 << VSCR_SAT);
2149 }
2150}
2151
d15f74fb 2152void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2153{
2154 int i, j;
2155 int sat = 0;
2156
2157 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2158 uint64_t t = (uint64_t)b->u32[i];
2159
2160 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2161 t += a->u8[4 * i + j];
2162 }
2163 r->u32[i] = cvtuduw(t, &sat);
2164 }
2165
2166 if (sat) {
2167 env->vscr |= (1 << VSCR_SAT);
2168 }
2169}
2170
2171#if defined(HOST_WORDS_BIGENDIAN)
2172#define UPKHI 1
2173#define UPKLO 0
2174#else
2175#define UPKHI 0
2176#define UPKLO 1
2177#endif
2178#define VUPKPX(suffix, hi) \
2179 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2180 { \
2181 int i; \
2182 ppc_avr_t result; \
2183 \
2184 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2185 uint16_t e = b->u16[hi ? i : i+4]; \
2186 uint8_t a = (e >> 15) ? 0xff : 0; \
2187 uint8_t r = (e >> 10) & 0x1f; \
2188 uint8_t g = (e >> 5) & 0x1f; \
2189 uint8_t b = e & 0x1f; \
2190 \
2191 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2192 } \
2193 *r = result; \
2194 }
2195VUPKPX(lpx, UPKLO)
2196VUPKPX(hpx, UPKHI)
2197#undef VUPKPX
2198
2199#define VUPK(suffix, unpacked, packee, hi) \
2200 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2201 { \
2202 int i; \
2203 ppc_avr_t result; \
2204 \
2205 if (hi) { \
2206 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2207 result.unpacked[i] = b->packee[i]; \
2208 } \
2209 } else { \
2210 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2211 i++) { \
2212 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2213 } \
2214 } \
2215 *r = result; \
2216 }
2217VUPK(hsb, s16, s8, UPKHI)
2218VUPK(hsh, s32, s16, UPKHI)
4430e076 2219VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2220VUPK(lsb, s16, s8, UPKLO)
2221VUPK(lsh, s32, s16, UPKLO)
4430e076 2222VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2223#undef VUPK
2224#undef UPKHI
2225#undef UPKLO
2226
f293f04a
TM
2227#define VGENERIC_DO(name, element) \
2228 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2229 { \
2230 int i; \
2231 \
2232 VECTOR_FOR_INORDER_I(i, element) { \
2233 r->element[i] = name(b->element[i]); \
2234 } \
2235 }
2236
2237#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2238#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2239#define clzw(v) clz32((v))
2240#define clzd(v) clz64((v))
2241
2242VGENERIC_DO(clzb, u8)
2243VGENERIC_DO(clzh, u16)
2244VGENERIC_DO(clzw, u32)
2245VGENERIC_DO(clzd, u64)
2246
2247#undef clzb
2248#undef clzh
2249#undef clzw
2250#undef clzd
2251
a5ad8fbf
RS
2252#define ctzb(v) ((v) ? ctz32(v) : 8)
2253#define ctzh(v) ((v) ? ctz32(v) : 16)
2254#define ctzw(v) ctz32((v))
2255#define ctzd(v) ctz64((v))
2256
2257VGENERIC_DO(ctzb, u8)
2258VGENERIC_DO(ctzh, u16)
2259VGENERIC_DO(ctzw, u32)
2260VGENERIC_DO(ctzd, u64)
2261
2262#undef ctzb
2263#undef ctzh
2264#undef ctzw
2265#undef ctzd
2266
e13500b3
TM
2267#define popcntb(v) ctpop8(v)
2268#define popcnth(v) ctpop16(v)
2269#define popcntw(v) ctpop32(v)
2270#define popcntd(v) ctpop64(v)
2271
2272VGENERIC_DO(popcntb, u8)
2273VGENERIC_DO(popcnth, u16)
2274VGENERIC_DO(popcntw, u32)
2275VGENERIC_DO(popcntd, u64)
2276
2277#undef popcntb
2278#undef popcnth
2279#undef popcntw
2280#undef popcntd
f293f04a
TM
2281
2282#undef VGENERIC_DO
2283
b41da4eb
TM
2284#if defined(HOST_WORDS_BIGENDIAN)
2285#define QW_ONE { .u64 = { 0, 1 } }
2286#else
2287#define QW_ONE { .u64 = { 1, 0 } }
2288#endif
2289
2290#ifndef CONFIG_INT128
2291
2292static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2293{
2294 t->u64[0] = ~a.u64[0];
2295 t->u64[1] = ~a.u64[1];
2296}
2297
2298static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2299{
2300 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2301 return -1;
2302 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2303 return 1;
2304 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2305 return -1;
2306 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2307 return 1;
2308 } else {
2309 return 0;
2310 }
2311}
2312
2313static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2314{
2315 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2316 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2317 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2318}
2319
2320static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2321{
2322 ppc_avr_t not_a;
2323 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2324 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2325 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2326 avr_qw_not(&not_a, a);
2327 return avr_qw_cmpu(not_a, b) < 0;
2328}
2329
2330#endif
2331
2332void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2333{
2334#ifdef CONFIG_INT128
2335 r->u128 = a->u128 + b->u128;
2336#else
2337 avr_qw_add(r, *a, *b);
2338#endif
2339}
2340
2341void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2342{
2343#ifdef CONFIG_INT128
2344 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2345#else
2346
2347 if (c->u64[LO_IDX] & 1) {
2348 ppc_avr_t tmp;
2349
2350 tmp.u64[HI_IDX] = 0;
2351 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2352 avr_qw_add(&tmp, *a, tmp);
2353 avr_qw_add(r, tmp, *b);
2354 } else {
2355 avr_qw_add(r, *a, *b);
2356 }
2357#endif
2358}
2359
2360void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2361{
2362#ifdef CONFIG_INT128
2363 r->u128 = (~a->u128 < b->u128);
2364#else
2365 ppc_avr_t not_a;
2366
2367 avr_qw_not(&not_a, *a);
2368
2369 r->u64[HI_IDX] = 0;
2370 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2371#endif
2372}
2373
2374void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2375{
2376#ifdef CONFIG_INT128
2377 int carry_out = (~a->u128 < b->u128);
2378 if (!carry_out && (c->u128 & 1)) {
2379 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2380 ((a->u128 != 0) || (b->u128 != 0));
2381 }
2382 r->u128 = carry_out;
2383#else
2384
2385 int carry_in = c->u64[LO_IDX] & 1;
2386 int carry_out = 0;
2387 ppc_avr_t tmp;
2388
2389 carry_out = avr_qw_addc(&tmp, *a, *b);
2390
2391 if (!carry_out && carry_in) {
2392 ppc_avr_t one = QW_ONE;
2393 carry_out = avr_qw_addc(&tmp, tmp, one);
2394 }
2395 r->u64[HI_IDX] = 0;
2396 r->u64[LO_IDX] = carry_out;
2397#endif
2398}
2399
2400void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2401{
2402#ifdef CONFIG_INT128
2403 r->u128 = a->u128 - b->u128;
2404#else
2405 ppc_avr_t tmp;
2406 ppc_avr_t one = QW_ONE;
2407
2408 avr_qw_not(&tmp, *b);
2409 avr_qw_add(&tmp, *a, tmp);
2410 avr_qw_add(r, tmp, one);
2411#endif
2412}
2413
2414void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2415{
2416#ifdef CONFIG_INT128
2417 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2418#else
2419 ppc_avr_t tmp, sum;
2420
2421 avr_qw_not(&tmp, *b);
2422 avr_qw_add(&sum, *a, tmp);
2423
2424 tmp.u64[HI_IDX] = 0;
2425 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2426 avr_qw_add(r, sum, tmp);
2427#endif
2428}
2429
2430void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2431{
2432#ifdef CONFIG_INT128
2433 r->u128 = (~a->u128 < ~b->u128) ||
2434 (a->u128 + ~b->u128 == (__uint128_t)-1);
2435#else
2436 int carry = (avr_qw_cmpu(*a, *b) > 0);
2437 if (!carry) {
2438 ppc_avr_t tmp;
2439 avr_qw_not(&tmp, *b);
2440 avr_qw_add(&tmp, *a, tmp);
2441 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2442 }
2443 r->u64[HI_IDX] = 0;
2444 r->u64[LO_IDX] = carry;
2445#endif
2446}
2447
2448void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2449{
2450#ifdef CONFIG_INT128
2451 r->u128 =
2452 (~a->u128 < ~b->u128) ||
2453 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2454#else
2455 int carry_in = c->u64[LO_IDX] & 1;
2456 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2457 if (!carry_out && carry_in) {
2458 ppc_avr_t tmp;
2459 avr_qw_not(&tmp, *b);
2460 avr_qw_add(&tmp, *a, tmp);
2461 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2462 }
2463
2464 r->u64[HI_IDX] = 0;
2465 r->u64[LO_IDX] = carry_out;
2466#endif
2467}
2468
e8f7b27b
TM
2469#define BCD_PLUS_PREF_1 0xC
2470#define BCD_PLUS_PREF_2 0xF
2471#define BCD_PLUS_ALT_1 0xA
2472#define BCD_NEG_PREF 0xD
2473#define BCD_NEG_ALT 0xB
2474#define BCD_PLUS_ALT_2 0xE
b8155872
JRZ
2475#define NATIONAL_PLUS 0x2B
2476#define NATIONAL_NEG 0x2D
e8f7b27b
TM
2477
2478#if defined(HOST_WORDS_BIGENDIAN)
2479#define BCD_DIG_BYTE(n) (15 - (n/2))
2480#else
2481#define BCD_DIG_BYTE(n) (n/2)
2482#endif
2483
2484static int bcd_get_sgn(ppc_avr_t *bcd)
2485{
2486 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2487 case BCD_PLUS_PREF_1:
2488 case BCD_PLUS_PREF_2:
2489 case BCD_PLUS_ALT_1:
2490 case BCD_PLUS_ALT_2:
2491 {
2492 return 1;
2493 }
2494
2495 case BCD_NEG_PREF:
2496 case BCD_NEG_ALT:
2497 {
2498 return -1;
2499 }
2500
2501 default:
2502 {
2503 return 0;
2504 }
2505 }
2506}
2507
2508static int bcd_preferred_sgn(int sgn, int ps)
2509{
2510 if (sgn >= 0) {
2511 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2512 } else {
2513 return BCD_NEG_PREF;
2514 }
2515}
2516
2517static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2518{
2519 uint8_t result;
2520 if (n & 1) {
2521 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2522 } else {
2523 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2524 }
2525
2526 if (unlikely(result > 9)) {
2527 *invalid = true;
2528 }
2529 return result;
2530}
2531
2532static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2533{
2534 if (n & 1) {
2535 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2536 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2537 } else {
2538 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2539 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2540 }
2541}
2542
b8155872
JRZ
2543static int bcd_cmp_zero(ppc_avr_t *bcd)
2544{
2545 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2546 return 1 << CRF_EQ;
2547 } else {
2548 return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2549 }
2550}
2551
2552static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2553{
2554#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2555 return reg->u16[7 - n];
b8155872
JRZ
2556#else
2557 return reg->u16[n];
2558#endif
2559}
2560
e2106d73
JRZ
2561static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2562{
2563#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2564 reg->u16[7 - n] = val;
e2106d73
JRZ
2565#else
2566 reg->u16[n] = val;
2567#endif
2568}
2569
e8f7b27b
TM
2570static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2571{
2572 int i;
2573 int invalid = 0;
2574 for (i = 31; i > 0; i--) {
2575 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2576 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2577 if (unlikely(invalid)) {
3b163b01 2578 return 0; /* doesn't matter */
e8f7b27b
TM
2579 } else if (dig_a > dig_b) {
2580 return 1;
2581 } else if (dig_a < dig_b) {
2582 return -1;
2583 }
2584 }
2585
2586 return 0;
2587}
2588
2589static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2590 int *overflow)
2591{
2592 int carry = 0;
2593 int i;
2594 int is_zero = 1;
2595 for (i = 1; i <= 31; i++) {
2596 uint8_t digit = bcd_get_digit(a, i, invalid) +
2597 bcd_get_digit(b, i, invalid) + carry;
2598 is_zero &= (digit == 0);
2599 if (digit > 9) {
2600 carry = 1;
2601 digit -= 10;
2602 } else {
2603 carry = 0;
2604 }
2605
2606 bcd_put_digit(t, digit, i);
2607
2608 if (unlikely(*invalid)) {
2609 return -1;
2610 }
2611 }
2612
2613 *overflow = carry;
2614 return is_zero;
2615}
2616
2617static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2618 int *overflow)
2619{
2620 int carry = 0;
2621 int i;
2622 int is_zero = 1;
2623 for (i = 1; i <= 31; i++) {
2624 uint8_t digit = bcd_get_digit(a, i, invalid) -
2625 bcd_get_digit(b, i, invalid) + carry;
2626 is_zero &= (digit == 0);
2627 if (digit & 0x80) {
2628 carry = -1;
2629 digit += 10;
2630 } else {
2631 carry = 0;
2632 }
2633
2634 bcd_put_digit(t, digit, i);
2635
2636 if (unlikely(*invalid)) {
2637 return -1;
2638 }
2639 }
2640
2641 *overflow = carry;
2642 return is_zero;
2643}
2644
2645uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2646{
2647
2648 int sgna = bcd_get_sgn(a);
2649 int sgnb = bcd_get_sgn(b);
2650 int invalid = (sgna == 0) || (sgnb == 0);
2651 int overflow = 0;
2652 int zero = 0;
2653 uint32_t cr = 0;
2654 ppc_avr_t result = { .u64 = { 0, 0 } };
2655
2656 if (!invalid) {
2657 if (sgna == sgnb) {
2658 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2659 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
72189ea4 2660 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2661 } else if (bcd_cmp_mag(a, b) > 0) {
2662 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2663 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
72189ea4 2664 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2665 } else {
2666 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2667 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
72189ea4 2668 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2669 }
2670 }
2671
2672 if (unlikely(invalid)) {
2673 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
72189ea4 2674 cr = 1 << CRF_SO;
e8f7b27b 2675 } else if (overflow) {
72189ea4 2676 cr |= 1 << CRF_SO;
e8f7b27b 2677 } else if (zero) {
72189ea4 2678 cr = 1 << CRF_EQ;
e8f7b27b
TM
2679 }
2680
2681 *r = result;
2682
2683 return cr;
2684}
2685
2686uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2687{
2688 ppc_avr_t bcopy = *b;
2689 int sgnb = bcd_get_sgn(b);
2690 if (sgnb < 0) {
2691 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2692 } else if (sgnb > 0) {
2693 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2694 }
2695 /* else invalid ... defer to bcdadd code for proper handling */
2696
2697 return helper_bcdadd(r, a, &bcopy, ps);
2698}
f293f04a 2699
b8155872
JRZ
2700uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2701{
2702 int i;
2703 int cr = 0;
2704 uint16_t national = 0;
2705 uint16_t sgnb = get_national_digit(b, 0);
2706 ppc_avr_t ret = { .u64 = { 0, 0 } };
2707 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2708
2709 for (i = 1; i < 8; i++) {
2710 national = get_national_digit(b, i);
2711 if (unlikely(national < 0x30 || national > 0x39)) {
2712 invalid = 1;
2713 break;
2714 }
2715
2716 bcd_put_digit(&ret, national & 0xf, i);
2717 }
2718
2719 if (sgnb == NATIONAL_PLUS) {
2720 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2721 } else {
2722 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2723 }
2724
2725 cr = bcd_cmp_zero(&ret);
2726
2727 if (unlikely(invalid)) {
2728 cr = 1 << CRF_SO;
2729 }
2730
2731 *r = ret;
2732
2733 return cr;
2734}
2735
e2106d73
JRZ
2736uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2737{
2738 int i;
2739 int cr = 0;
2740 int sgnb = bcd_get_sgn(b);
2741 int invalid = (sgnb == 0);
2742 ppc_avr_t ret = { .u64 = { 0, 0 } };
2743
2744 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2745
2746 for (i = 1; i < 8; i++) {
2747 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2748
2749 if (unlikely(invalid)) {
2750 break;
2751 }
2752 }
2753 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2754
2755 cr = bcd_cmp_zero(b);
2756
2757 if (ox_flag) {
2758 cr |= 1 << CRF_SO;
2759 }
2760
2761 if (unlikely(invalid)) {
2762 cr = 1 << CRF_SO;
2763 }
2764
2765 *r = ret;
2766
2767 return cr;
2768}
2769
38f4cb04
JRZ
2770uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2771{
2772 int i;
2773 int cr = 0;
2774 int invalid = 0;
2775 int zone_digit = 0;
2776 int zone_lead = ps ? 0xF : 0x3;
2777 int digit = 0;
2778 ppc_avr_t ret = { .u64 = { 0, 0 } };
2779 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2780
2781 if (unlikely((sgnb < 0xA) && ps)) {
2782 invalid = 1;
2783 }
2784
2785 for (i = 0; i < 16; i++) {
2786 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2787 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2788 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2789 invalid = 1;
2790 break;
2791 }
2792
2793 bcd_put_digit(&ret, digit, i + 1);
2794 }
2795
2796 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2797 (!ps && (sgnb & 0x4))) {
2798 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2799 } else {
2800 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2801 }
2802
2803 cr = bcd_cmp_zero(&ret);
2804
2805 if (unlikely(invalid)) {
2806 cr = 1 << CRF_SO;
2807 }
2808
2809 *r = ret;
2810
2811 return cr;
2812}
2813
0a890b31
JRZ
2814uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2815{
2816 int i;
2817 int cr = 0;
2818 uint8_t digit = 0;
2819 int sgnb = bcd_get_sgn(b);
2820 int zone_lead = (ps) ? 0xF0 : 0x30;
2821 int invalid = (sgnb == 0);
2822 ppc_avr_t ret = { .u64 = { 0, 0 } };
2823
2824 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2825
2826 for (i = 0; i < 16; i++) {
2827 digit = bcd_get_digit(b, i + 1, &invalid);
2828
2829 if (unlikely(invalid)) {
2830 break;
2831 }
2832
2833 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2834 }
2835
2836 if (ps) {
2837 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2838 } else {
2839 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2840 }
2841
2842 cr = bcd_cmp_zero(b);
2843
2844 if (ox_flag) {
2845 cr |= 1 << CRF_SO;
2846 }
2847
2848 if (unlikely(invalid)) {
2849 cr = 1 << CRF_SO;
2850 }
2851
2852 *r = ret;
2853
2854 return cr;
2855}
2856
c1542453 2857void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2858{
2859 int i;
2860 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2861 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2862 }
2863}
2864
c1542453 2865void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2866{
65cf1f65 2867 ppc_avr_t result;
557d52fa 2868 int i;
557d52fa 2869
c1542453 2870 VECTOR_FOR_INORDER_I(i, u32) {
65cf1f65 2871 result.AVRW(i) = b->AVRW(i) ^
c1542453
TM
2872 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2873 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2874 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2875 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
557d52fa 2876 }
65cf1f65 2877 *r = result;
557d52fa
TM
2878}
2879
557d52fa
TM
2880void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2881{
65cf1f65 2882 ppc_avr_t result;
c1542453
TM
2883 int i;
2884
2885 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2886 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
c1542453 2887 }
65cf1f65 2888 *r = result;
557d52fa
TM
2889}
2890
2891void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2892{
2893 /* This differs from what is written in ISA V2.07. The RTL is */
2894 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2895 int i;
2896 ppc_avr_t tmp;
2897
2898 VECTOR_FOR_INORDER_I(i, u8) {
2899 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2900 }
2901
2902 VECTOR_FOR_INORDER_I(i, u32) {
2903 r->AVRW(i) =
2904 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2905 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2906 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2907 AES_imc[tmp.AVRB(4*i + 3)][3];
2908 }
557d52fa
TM
2909}
2910
2911void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2912{
65cf1f65 2913 ppc_avr_t result;
c1542453
TM
2914 int i;
2915
2916 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2917 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
c1542453 2918 }
65cf1f65 2919 *r = result;
557d52fa
TM
2920}
2921
57354f8f
TM
2922#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2923#if defined(HOST_WORDS_BIGENDIAN)
2924#define EL_IDX(i) (i)
2925#else
2926#define EL_IDX(i) (3 - (i))
2927#endif
2928
2929void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2930{
2931 int st = (st_six & 0x10) != 0;
2932 int six = st_six & 0xF;
2933 int i;
2934
2935 VECTOR_FOR_INORDER_I(i, u32) {
2936 if (st == 0) {
2937 if ((six & (0x8 >> i)) == 0) {
2938 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2939 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2940 (a->u32[EL_IDX(i)] >> 3);
2941 } else { /* six.bit[i] == 1 */
2942 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2943 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2944 (a->u32[EL_IDX(i)] >> 10);
2945 }
2946 } else { /* st == 1 */
2947 if ((six & (0x8 >> i)) == 0) {
2948 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2949 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2950 ROTRu32(a->u32[EL_IDX(i)], 22);
2951 } else { /* six.bit[i] == 1 */
2952 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2953 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2954 ROTRu32(a->u32[EL_IDX(i)], 25);
2955 }
2956 }
2957 }
2958}
2959
2960#undef ROTRu32
2961#undef EL_IDX
2962
2963#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2964#if defined(HOST_WORDS_BIGENDIAN)
2965#define EL_IDX(i) (i)
2966#else
2967#define EL_IDX(i) (1 - (i))
2968#endif
2969
2970void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2971{
2972 int st = (st_six & 0x10) != 0;
2973 int six = st_six & 0xF;
2974 int i;
2975
2976 VECTOR_FOR_INORDER_I(i, u64) {
2977 if (st == 0) {
2978 if ((six & (0x8 >> (2*i))) == 0) {
2979 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2980 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2981 (a->u64[EL_IDX(i)] >> 7);
2982 } else { /* six.bit[2*i] == 1 */
2983 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2984 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2985 (a->u64[EL_IDX(i)] >> 6);
2986 }
2987 } else { /* st == 1 */
2988 if ((six & (0x8 >> (2*i))) == 0) {
2989 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2990 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2991 ROTRu64(a->u64[EL_IDX(i)], 39);
2992 } else { /* six.bit[2*i] == 1 */
2993 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2994 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2995 ROTRu64(a->u64[EL_IDX(i)], 41);
2996 }
2997 }
2998 }
2999}
3000
3001#undef ROTRu64
3002#undef EL_IDX
3003
ac174549
TM
3004void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3005{
65cf1f65 3006 ppc_avr_t result;
ac174549 3007 int i;
65cf1f65 3008
ac174549
TM
3009 VECTOR_FOR_INORDER_I(i, u8) {
3010 int indexA = c->u8[i] >> 4;
3011 int indexB = c->u8[i] & 0xF;
3012#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 3013 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 3014#else
65cf1f65 3015 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
3016#endif
3017 }
65cf1f65 3018 *r = result;
ac174549
TM
3019}
3020
64654ded
BS
3021#undef VECTOR_FOR_INORDER_I
3022#undef HI_IDX
3023#undef LO_IDX
3024
3025/*****************************************************************************/
3026/* SPE extension helpers */
3027/* Use a table to make this quicker */
ea6c0dac 3028static const uint8_t hbrev[16] = {
64654ded
BS
3029 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3030 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3031};
3032
3033static inline uint8_t byte_reverse(uint8_t val)
3034{
3035 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3036}
3037
3038static inline uint32_t word_reverse(uint32_t val)
3039{
3040 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3041 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3042}
3043
3044#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3045target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3046{
3047 uint32_t a, b, d, mask;
3048
3049 mask = UINT32_MAX >> (32 - MASKBITS);
3050 a = arg1 & mask;
3051 b = arg2 & mask;
3052 d = word_reverse(1 + word_reverse(a | ~b));
3053 return (arg1 & ~mask) | (d & b);
3054}
3055
3056uint32_t helper_cntlsw32(uint32_t val)
3057{
3058 if (val & 0x80000000) {
3059 return clz32(~val);
3060 } else {
3061 return clz32(val);
3062 }
3063}
3064
3065uint32_t helper_cntlzw32(uint32_t val)
3066{
3067 return clz32(val);
3068}
3069
3070/* 440 specific */
d15f74fb
BS
3071target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3072 target_ulong low, uint32_t update_Rc)
64654ded
BS
3073{
3074 target_ulong mask;
3075 int i;
3076
3077 i = 1;
3078 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3079 if ((high & mask) == 0) {
3080 if (update_Rc) {
3081 env->crf[0] = 0x4;
3082 }
3083 goto done;
3084 }
3085 i++;
3086 }
3087 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3088 if ((low & mask) == 0) {
3089 if (update_Rc) {
3090 env->crf[0] = 0x8;
3091 }
3092 goto done;
3093 }
3094 i++;
3095 }
ebbd8b40 3096 i = 8;
64654ded
BS
3097 if (update_Rc) {
3098 env->crf[0] = 0x2;
3099 }
3100 done:
3101 env->xer = (env->xer & ~0x7F) | i;
3102 if (update_Rc) {
3103 env->crf[0] |= xer_so;
3104 }
3105 return i;
3106}