]> git.proxmox.com Git - mirror_qemu.git/blame - target/ppc/int_helper.c
target-ppc: Implement bcdctsq. instruction
[mirror_qemu.git] / target / ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
3e00884f 21#include "internal.h"
63c91552 22#include "exec/exec-all.h"
1de7afc9 23#include "qemu/host-utils.h"
2ef6175a 24#include "exec/helper-proto.h"
6f2945cd 25#include "crypto/aes.h"
64654ded
BS
26
27#include "helper_regs.h"
28/*****************************************************************************/
29/* Fixed point operations helpers */
64654ded 30
6a4fda33
TM
31target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
33{
34 uint64_t rt = 0;
35 int overflow = 0;
36
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
39
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
45 }
46
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
49 }
50
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
56 }
57 }
58
59 return (target_ulong)rt;
60}
61
a98eb9e9
TM
62target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
64{
65 int64_t rt = 0;
66 int overflow = 0;
67
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
70
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
77 }
78
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
81 }
82
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
88 }
89 }
90
91 return (target_ulong)rt;
92}
93
98d1eb27
TM
94#if defined(TARGET_PPC64)
95
96uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97{
98 uint64_t rt = 0;
99 int overflow = 0;
100
101 overflow = divu128(&rt, &ra, rb);
102
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
105 }
106
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
112 }
113 }
114
115 return rt;
116}
117
e44259b6
TM
118uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119{
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
124
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
127 }
128
129 if (oe) {
130
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
135 }
136 }
137
138 return rt;
139}
140
98d1eb27
TM
141#endif
142
143
64654ded 144#if defined(TARGET_PPC64)
082ce330
ND
145/* if x = 0xab, returns 0xababababababababa */
146#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
147
148/* substract 1 from each byte, and with inverse, check if MSB is set at each
149 * byte.
150 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
151 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
152 */
153#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
154
155/* When you XOR the pattern and there is a match, that byte will be zero */
156#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
157
158uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
159{
efa73196 160 return hasvalue(rb, ra) ? CRF_GT : 0;
082ce330
ND
161}
162
163#undef pattern
164#undef haszero
165#undef hasvalue
166
fec5c62a
RB
167/* Return invalid random number.
168 *
169 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
170 * random number
171 */
172target_ulong helper_darn32(void)
173{
174 return -1;
175}
176
177target_ulong helper_darn64(void)
178{
179 return -1;
180}
181
64654ded
BS
182#endif
183
86ba37ed
TM
184#if defined(TARGET_PPC64)
185
186uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
187{
188 int i;
189 uint64_t ra = 0;
190
191 for (i = 0; i < 8; i++) {
192 int index = (rs >> (i*8)) & 0xFF;
193 if (index < 64) {
194 if (rb & (1ull << (63-index))) {
195 ra |= 1 << i;
196 }
197 }
198 }
199 return ra;
200}
201
202#endif
203
fcfda20f
AJ
204target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
205{
206 target_ulong mask = 0xff;
207 target_ulong ra = 0;
208 int i;
209
210 for (i = 0; i < sizeof(target_ulong); i++) {
211 if ((rs & mask) == (rb & mask)) {
212 ra |= mask;
213 }
214 mask <<= 8;
215 }
216 return ra;
217}
218
64654ded 219/* shift right arithmetic helper */
d15f74fb
BS
220target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
221 target_ulong shift)
64654ded
BS
222{
223 int32_t ret;
224
225 if (likely(!(shift & 0x20))) {
226 if (likely((uint32_t)shift != 0)) {
227 shift &= 0x1f;
228 ret = (int32_t)value >> shift;
229 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
da91a00f 230 env->ca = 0;
64654ded 231 } else {
da91a00f 232 env->ca = 1;
64654ded
BS
233 }
234 } else {
235 ret = (int32_t)value;
da91a00f 236 env->ca = 0;
64654ded
BS
237 }
238 } else {
239 ret = (int32_t)value >> 31;
da91a00f 240 env->ca = (ret != 0);
64654ded
BS
241 }
242 return (target_long)ret;
243}
244
245#if defined(TARGET_PPC64)
d15f74fb
BS
246target_ulong helper_srad(CPUPPCState *env, target_ulong value,
247 target_ulong shift)
64654ded
BS
248{
249 int64_t ret;
250
251 if (likely(!(shift & 0x40))) {
252 if (likely((uint64_t)shift != 0)) {
253 shift &= 0x3f;
254 ret = (int64_t)value >> shift;
4bc02e23 255 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
da91a00f 256 env->ca = 0;
64654ded 257 } else {
da91a00f 258 env->ca = 1;
64654ded
BS
259 }
260 } else {
261 ret = (int64_t)value;
da91a00f 262 env->ca = 0;
64654ded
BS
263 }
264 } else {
265 ret = (int64_t)value >> 63;
da91a00f 266 env->ca = (ret != 0);
64654ded
BS
267 }
268 return ret;
269}
270#endif
271
272#if defined(TARGET_PPC64)
273target_ulong helper_popcntb(target_ulong val)
274{
79770002 275 /* Note that we don't fold past bytes */
64654ded
BS
276 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
277 0x5555555555555555ULL);
278 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
279 0x3333333333333333ULL);
280 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
281 0x0f0f0f0f0f0f0f0fULL);
282 return val;
283}
284
285target_ulong helper_popcntw(target_ulong val)
286{
79770002 287 /* Note that we don't fold past words. */
64654ded
BS
288 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
289 0x5555555555555555ULL);
290 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
291 0x3333333333333333ULL);
292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
293 0x0f0f0f0f0f0f0f0fULL);
294 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
295 0x00ff00ff00ff00ffULL);
296 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
297 0x0000ffff0000ffffULL);
298 return val;
299}
64654ded
BS
300#else
301target_ulong helper_popcntb(target_ulong val)
302{
79770002 303 /* Note that we don't fold past bytes */
64654ded
BS
304 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
305 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
306 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
307 return val;
308}
64654ded
BS
309#endif
310
311/*****************************************************************************/
312/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 313target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
314{
315 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
316
317 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
318 (int32_t)arg2 == 0) {
319 env->spr[SPR_MQ] = 0;
320 return INT32_MIN;
321 } else {
322 env->spr[SPR_MQ] = tmp % arg2;
323 return tmp / (int32_t)arg2;
324 }
325}
326
d15f74fb
BS
327target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
328 target_ulong arg2)
64654ded
BS
329{
330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
331
332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333 (int32_t)arg2 == 0) {
da91a00f 334 env->so = env->ov = 1;
64654ded
BS
335 env->spr[SPR_MQ] = 0;
336 return INT32_MIN;
337 } else {
338 env->spr[SPR_MQ] = tmp % arg2;
339 tmp /= (int32_t)arg2;
340 if ((int32_t)tmp != tmp) {
da91a00f 341 env->so = env->ov = 1;
64654ded 342 } else {
da91a00f 343 env->ov = 0;
64654ded
BS
344 }
345 return tmp;
346 }
347}
348
d15f74fb
BS
349target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
350 target_ulong arg2)
64654ded
BS
351{
352 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
353 (int32_t)arg2 == 0) {
354 env->spr[SPR_MQ] = 0;
355 return INT32_MIN;
356 } else {
357 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
358 return (int32_t)arg1 / (int32_t)arg2;
359 }
360}
361
d15f74fb
BS
362target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
363 target_ulong arg2)
64654ded
BS
364{
365 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
366 (int32_t)arg2 == 0) {
da91a00f 367 env->so = env->ov = 1;
64654ded
BS
368 env->spr[SPR_MQ] = 0;
369 return INT32_MIN;
370 } else {
da91a00f 371 env->ov = 0;
64654ded
BS
372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373 return (int32_t)arg1 / (int32_t)arg2;
374 }
375}
376
377/*****************************************************************************/
378/* 602 specific instructions */
379/* mfrom is the most crazy instruction ever seen, imho ! */
380/* Real implementation uses a ROM table. Do the same */
381/* Extremely decomposed:
382 * -arg / 256
383 * return 256 * log10(10 + 1.0) + 0.5
384 */
385#if !defined(CONFIG_USER_ONLY)
386target_ulong helper_602_mfrom(target_ulong arg)
387{
388 if (likely(arg < 602)) {
389#include "mfrom_table.c"
390 return mfrom_ROM_table[arg];
391 } else {
392 return 0;
393 }
394}
395#endif
396
397/*****************************************************************************/
398/* Altivec extension helpers */
399#if defined(HOST_WORDS_BIGENDIAN)
400#define HI_IDX 0
401#define LO_IDX 1
c1542453
TM
402#define AVRB(i) u8[i]
403#define AVRW(i) u32[i]
64654ded
BS
404#else
405#define HI_IDX 1
406#define LO_IDX 0
c1542453
TM
407#define AVRB(i) u8[15-(i)]
408#define AVRW(i) u32[3-(i)]
64654ded
BS
409#endif
410
411#if defined(HOST_WORDS_BIGENDIAN)
412#define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414#else
415#define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
417#endif
418
64654ded
BS
419/* Saturating arithmetic helpers. */
420#define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
422 { \
423 to_type r; \
424 \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
433 } \
434 return r; \
435 }
436#define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
446 } \
447 return r; \
448 }
449SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
452
453SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459#undef SATCVT
460#undef SATCVTU
461
462void helper_lvsl(ppc_avr_t *r, target_ulong sh)
463{
464 int i, j = (sh & 0xf);
465
466 VECTOR_FOR_INORDER_I(i, u8) {
467 r->u8[i] = j++;
468 }
469}
470
471void helper_lvsr(ppc_avr_t *r, target_ulong sh)
472{
473 int i, j = 0x10 - (sh & 0xf);
474
475 VECTOR_FOR_INORDER_I(i, u8) {
476 r->u8[i] = j++;
477 }
478}
479
d15f74fb 480void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
481{
482#if defined(HOST_WORDS_BIGENDIAN)
483 env->vscr = r->u32[3];
484#else
485 env->vscr = r->u32[0];
486#endif
487 set_flush_to_zero(vscr_nj, &env->vec_status);
488}
489
490void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
491{
492 int i;
493
494 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
495 r->u32[i] = ~a->u32[i] < b->u32[i];
496 }
497}
498
5c69452c
AK
499/* vprtybw */
500void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
501{
502 int i;
503 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
504 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
505 res ^= res >> 8;
506 r->u32[i] = res & 1;
507 }
508}
509
510/* vprtybd */
511void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
512{
513 int i;
514 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
515 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
516 res ^= res >> 16;
517 res ^= res >> 8;
518 r->u64[i] = res & 1;
519 }
520}
521
522/* vprtybq */
523void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
524{
525 uint64_t res = b->u64[0] ^ b->u64[1];
526 res ^= res >> 32;
527 res ^= res >> 16;
528 res ^= res >> 8;
529 r->u64[LO_IDX] = res & 1;
530 r->u64[HI_IDX] = 0;
531}
532
64654ded
BS
533#define VARITH_DO(name, op, element) \
534 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
535 { \
536 int i; \
537 \
538 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
539 r->element[i] = a->element[i] op b->element[i]; \
540 } \
541 }
542#define VARITH(suffix, element) \
543 VARITH_DO(add##suffix, +, element) \
544 VARITH_DO(sub##suffix, -, element)
545VARITH(ubm, u8)
546VARITH(uhm, u16)
547VARITH(uwm, u32)
56eabc75 548VARITH(udm, u64)
953f0f58 549VARITH_DO(muluwm, *, u32)
64654ded
BS
550#undef VARITH_DO
551#undef VARITH
552
553#define VARITHFP(suffix, func) \
d15f74fb
BS
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b) \
64654ded
BS
556 { \
557 int i; \
558 \
559 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 560 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
561 } \
562 }
563VARITHFP(addfp, float32_add)
564VARITHFP(subfp, float32_sub)
db1babb8
AJ
565VARITHFP(minfp, float32_min)
566VARITHFP(maxfp, float32_max)
64654ded
BS
567#undef VARITHFP
568
2f93c23f
AJ
569#define VARITHFPFMA(suffix, type) \
570 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
571 ppc_avr_t *b, ppc_avr_t *c) \
572 { \
573 int i; \
574 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
575 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
576 type, &env->vec_status); \
577 } \
578 }
579VARITHFPFMA(maddfp, 0);
580VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
581#undef VARITHFPFMA
582
64654ded
BS
583#define VARITHSAT_CASE(type, op, cvt, element) \
584 { \
585 type result = (type)a->element[i] op (type)b->element[i]; \
586 r->element[i] = cvt(result, &sat); \
587 }
588
589#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
590 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
591 ppc_avr_t *b) \
64654ded
BS
592 { \
593 int sat = 0; \
594 int i; \
595 \
596 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
597 switch (sizeof(r->element[0])) { \
598 case 1: \
599 VARITHSAT_CASE(optype, op, cvt, element); \
600 break; \
601 case 2: \
602 VARITHSAT_CASE(optype, op, cvt, element); \
603 break; \
604 case 4: \
605 VARITHSAT_CASE(optype, op, cvt, element); \
606 break; \
607 } \
608 } \
609 if (sat) { \
610 env->vscr |= (1 << VSCR_SAT); \
611 } \
612 }
613#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
614 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
615 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
616#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
617 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
618 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
619VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
620VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
621VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
622VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
623VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
624VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
625#undef VARITHSAT_CASE
626#undef VARITHSAT_DO
627#undef VARITHSAT_SIGNED
628#undef VARITHSAT_UNSIGNED
629
630#define VAVG_DO(name, element, etype) \
631 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
632 { \
633 int i; \
634 \
635 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
636 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
637 r->element[i] = x >> 1; \
638 } \
639 }
640
641#define VAVG(type, signed_element, signed_type, unsigned_element, \
642 unsigned_type) \
643 VAVG_DO(avgs##type, signed_element, signed_type) \
644 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
645VAVG(b, s8, int16_t, u8, uint16_t)
646VAVG(h, s16, int32_t, u16, uint32_t)
647VAVG(w, s32, int64_t, u32, uint64_t)
648#undef VAVG_DO
649#undef VAVG
650
37707059
SD
651#define VABSDU_DO(name, element) \
652void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
653{ \
654 int i; \
655 \
656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
657 r->element[i] = (a->element[i] > b->element[i]) ? \
658 (a->element[i] - b->element[i]) : \
659 (b->element[i] - a->element[i]); \
660 } \
661}
662
663/* VABSDU - Vector absolute difference unsigned
664 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
665 * element - element type to access from vector
666 */
667#define VABSDU(type, element) \
668 VABSDU_DO(absdu##type, element)
669VABSDU(b, u8)
670VABSDU(h, u16)
671VABSDU(w, u32)
672#undef VABSDU_DO
673#undef VABSDU
674
64654ded 675#define VCF(suffix, cvt, element) \
d15f74fb
BS
676 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
677 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
678 { \
679 int i; \
680 \
681 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
682 float32 t = cvt(b->element[i], &env->vec_status); \
683 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
684 } \
685 }
686VCF(ux, uint32_to_float32, u32)
687VCF(sx, int32_to_float32, s32)
688#undef VCF
689
690#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
691 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
692 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 693 { \
6f3dab41
TM
694 uint64_t ones = (uint64_t)-1; \
695 uint64_t all = ones; \
696 uint64_t none = 0; \
64654ded
BS
697 int i; \
698 \
699 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 700 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
701 ones : 0x0); \
702 switch (sizeof(a->element[0])) { \
6f3dab41
TM
703 case 8: \
704 r->u64[i] = result; \
705 break; \
64654ded
BS
706 case 4: \
707 r->u32[i] = result; \
708 break; \
709 case 2: \
710 r->u16[i] = result; \
711 break; \
712 case 1: \
713 r->u8[i] = result; \
714 break; \
715 } \
716 all &= result; \
717 none |= result; \
718 } \
719 if (record) { \
720 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
721 } \
722 }
723#define VCMP(suffix, compare, element) \
724 VCMP_DO(suffix, compare, element, 0) \
725 VCMP_DO(suffix##_dot, compare, element, 1)
726VCMP(equb, ==, u8)
727VCMP(equh, ==, u16)
728VCMP(equw, ==, u32)
6f3dab41 729VCMP(equd, ==, u64)
64654ded
BS
730VCMP(gtub, >, u8)
731VCMP(gtuh, >, u16)
732VCMP(gtuw, >, u32)
6f3dab41 733VCMP(gtud, >, u64)
64654ded
BS
734VCMP(gtsb, >, s8)
735VCMP(gtsh, >, s16)
736VCMP(gtsw, >, s32)
6f3dab41 737VCMP(gtsd, >, s64)
64654ded
BS
738#undef VCMP_DO
739#undef VCMP
740
0fa59364
RS
741#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
742void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
f7cc8466
SB
743 ppc_avr_t *a, ppc_avr_t *b) \
744{ \
745 etype ones = (etype)-1; \
746 etype all = ones; \
0fa59364 747 etype result, none = 0; \
f7cc8466
SB
748 int i; \
749 \
750 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
0fa59364
RS
751 if (cmpzero) { \
752 result = ((a->element[i] == 0) \
f7cc8466
SB
753 || (b->element[i] == 0) \
754 || (a->element[i] != b->element[i]) ? \
755 ones : 0x0); \
0fa59364
RS
756 } else { \
757 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
758 } \
f7cc8466
SB
759 r->element[i] = result; \
760 all &= result; \
761 none |= result; \
762 } \
763 if (record) { \
764 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
765 } \
766}
767
768/* VCMPNEZ - Vector compare not equal to zero
769 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
770 * element - element type to access from vector
771 */
0fa59364
RS
772#define VCMPNE(suffix, element, etype, cmpzero) \
773 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
774 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
775VCMPNE(zb, u8, uint8_t, 1)
776VCMPNE(zh, u16, uint16_t, 1)
777VCMPNE(zw, u32, uint32_t, 1)
778VCMPNE(b, u8, uint8_t, 0)
779VCMPNE(h, u16, uint16_t, 0)
780VCMPNE(w, u32, uint32_t, 0)
781#undef VCMPNE_DO
782#undef VCMPNE
f7cc8466 783
64654ded 784#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
785 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
786 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
787 { \
788 uint32_t ones = (uint32_t)-1; \
789 uint32_t all = ones; \
790 uint32_t none = 0; \
791 int i; \
792 \
793 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
794 uint32_t result; \
795 int rel = float32_compare_quiet(a->f[i], b->f[i], \
796 &env->vec_status); \
797 if (rel == float_relation_unordered) { \
798 result = 0; \
799 } else if (rel compare order) { \
800 result = ones; \
801 } else { \
802 result = 0; \
803 } \
804 r->u32[i] = result; \
805 all &= result; \
806 none |= result; \
807 } \
808 if (record) { \
809 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
810 } \
811 }
812#define VCMPFP(suffix, compare, order) \
813 VCMPFP_DO(suffix, compare, order, 0) \
814 VCMPFP_DO(suffix##_dot, compare, order, 1)
815VCMPFP(eqfp, ==, float_relation_equal)
816VCMPFP(gefp, !=, float_relation_less)
817VCMPFP(gtfp, ==, float_relation_greater)
818#undef VCMPFP_DO
819#undef VCMPFP
820
d15f74fb
BS
821static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
822 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
823{
824 int i;
825 int all_in = 0;
826
827 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
828 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
829 if (le_rel == float_relation_unordered) {
830 r->u32[i] = 0xc0000000;
4007b8de 831 all_in = 1;
64654ded
BS
832 } else {
833 float32 bneg = float32_chs(b->f[i]);
834 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
835 int le = le_rel != float_relation_greater;
836 int ge = ge_rel != float_relation_less;
837
838 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
839 all_in |= (!le | !ge);
840 }
841 }
842 if (record) {
843 env->crf[6] = (all_in == 0) << 1;
844 }
845}
846
d15f74fb 847void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 848{
d15f74fb 849 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
850}
851
d15f74fb
BS
852void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
853 ppc_avr_t *b)
64654ded 854{
d15f74fb 855 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
856}
857
858#define VCT(suffix, satcvt, element) \
d15f74fb
BS
859 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
860 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
861 { \
862 int i; \
863 int sat = 0; \
864 float_status s = env->vec_status; \
865 \
866 set_float_rounding_mode(float_round_to_zero, &s); \
867 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
868 if (float32_is_any_nan(b->f[i])) { \
869 r->element[i] = 0; \
870 } else { \
871 float64 t = float32_to_float64(b->f[i], &s); \
872 int64_t j; \
873 \
874 t = float64_scalbn(t, uim, &s); \
875 j = float64_to_int64(t, &s); \
876 r->element[i] = satcvt(j, &sat); \
877 } \
878 } \
879 if (sat) { \
880 env->vscr |= (1 << VSCR_SAT); \
881 } \
882 }
883VCT(uxs, cvtsduw, u32)
884VCT(sxs, cvtsdsw, s32)
885#undef VCT
886
4879538c
RS
887target_ulong helper_vclzlsbb(ppc_avr_t *r)
888{
889 target_ulong count = 0;
890 int i;
891 VECTOR_FOR_INORDER_I(i, u8) {
892 if (r->u8[i] & 0x01) {
893 break;
894 }
895 count++;
896 }
897 return count;
898}
899
900target_ulong helper_vctzlsbb(ppc_avr_t *r)
901{
902 target_ulong count = 0;
903 int i;
904#if defined(HOST_WORDS_BIGENDIAN)
905 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
906#else
907 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
908#endif
909 if (r->u8[i] & 0x01) {
910 break;
911 }
912 count++;
913 }
914 return count;
915}
916
d15f74fb
BS
917void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
918 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
919{
920 int sat = 0;
921 int i;
922
923 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
924 int32_t prod = a->s16[i] * b->s16[i];
925 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
926
927 r->s16[i] = cvtswsh(t, &sat);
928 }
929
930 if (sat) {
931 env->vscr |= (1 << VSCR_SAT);
932 }
933}
934
d15f74fb
BS
935void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
936 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
937{
938 int sat = 0;
939 int i;
940
941 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
942 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
943 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
944 r->s16[i] = cvtswsh(t, &sat);
945 }
946
947 if (sat) {
948 env->vscr |= (1 << VSCR_SAT);
949 }
950}
951
952#define VMINMAX_DO(name, compare, element) \
953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
954 { \
955 int i; \
956 \
957 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
958 if (a->element[i] compare b->element[i]) { \
959 r->element[i] = b->element[i]; \
960 } else { \
961 r->element[i] = a->element[i]; \
962 } \
963 } \
964 }
965#define VMINMAX(suffix, element) \
966 VMINMAX_DO(min##suffix, >, element) \
967 VMINMAX_DO(max##suffix, <, element)
968VMINMAX(sb, s8)
969VMINMAX(sh, s16)
970VMINMAX(sw, s32)
8203e31b 971VMINMAX(sd, s64)
64654ded
BS
972VMINMAX(ub, u8)
973VMINMAX(uh, u16)
974VMINMAX(uw, u32)
8203e31b 975VMINMAX(ud, u64)
64654ded
BS
976#undef VMINMAX_DO
977#undef VMINMAX
978
64654ded
BS
979void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
980{
981 int i;
982
983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
984 int32_t prod = a->s16[i] * b->s16[i];
985 r->s16[i] = (int16_t) (prod + c->s16[i]);
986 }
987}
988
989#define VMRG_DO(name, element, highp) \
990 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
991 { \
992 ppc_avr_t result; \
993 int i; \
994 size_t n_elems = ARRAY_SIZE(r->element); \
995 \
996 for (i = 0; i < n_elems / 2; i++) { \
997 if (highp) { \
998 result.element[i*2+HI_IDX] = a->element[i]; \
999 result.element[i*2+LO_IDX] = b->element[i]; \
1000 } else { \
1001 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1002 b->element[n_elems - i - 1]; \
1003 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1004 a->element[n_elems - i - 1]; \
1005 } \
1006 } \
1007 *r = result; \
1008 }
1009#if defined(HOST_WORDS_BIGENDIAN)
1010#define MRGHI 0
1011#define MRGLO 1
1012#else
1013#define MRGHI 1
1014#define MRGLO 0
1015#endif
1016#define VMRG(suffix, element) \
1017 VMRG_DO(mrgl##suffix, element, MRGHI) \
1018 VMRG_DO(mrgh##suffix, element, MRGLO)
1019VMRG(b, u8)
1020VMRG(h, u16)
1021VMRG(w, u32)
1022#undef VMRG_DO
1023#undef VMRG
1024#undef MRGHI
1025#undef MRGLO
1026
d15f74fb
BS
1027void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1028 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1029{
1030 int32_t prod[16];
1031 int i;
1032
1033 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1034 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1035 }
1036
1037 VECTOR_FOR_INORDER_I(i, s32) {
1038 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1039 prod[4 * i + 2] + prod[4 * i + 3];
1040 }
1041}
1042
d15f74fb
BS
1043void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1044 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1045{
1046 int32_t prod[8];
1047 int i;
1048
1049 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1050 prod[i] = a->s16[i] * b->s16[i];
1051 }
1052
1053 VECTOR_FOR_INORDER_I(i, s32) {
1054 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1055 }
1056}
1057
d15f74fb
BS
1058void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1059 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1060{
1061 int32_t prod[8];
1062 int i;
1063 int sat = 0;
1064
1065 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1066 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1067 }
1068
1069 VECTOR_FOR_INORDER_I(i, s32) {
1070 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1071
1072 r->u32[i] = cvtsdsw(t, &sat);
1073 }
1074
1075 if (sat) {
1076 env->vscr |= (1 << VSCR_SAT);
1077 }
1078}
1079
d15f74fb
BS
1080void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1081 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1082{
1083 uint16_t prod[16];
1084 int i;
1085
1086 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1087 prod[i] = a->u8[i] * b->u8[i];
1088 }
1089
1090 VECTOR_FOR_INORDER_I(i, u32) {
1091 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1092 prod[4 * i + 2] + prod[4 * i + 3];
1093 }
1094}
1095
d15f74fb
BS
1096void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1097 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1098{
1099 uint32_t prod[8];
1100 int i;
1101
1102 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1103 prod[i] = a->u16[i] * b->u16[i];
1104 }
1105
1106 VECTOR_FOR_INORDER_I(i, u32) {
1107 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1108 }
1109}
1110
d15f74fb
BS
1111void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1112 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1113{
1114 uint32_t prod[8];
1115 int i;
1116 int sat = 0;
1117
1118 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1119 prod[i] = a->u16[i] * b->u16[i];
1120 }
1121
1122 VECTOR_FOR_INORDER_I(i, s32) {
1123 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1124
1125 r->u32[i] = cvtuduw(t, &sat);
1126 }
1127
1128 if (sat) {
1129 env->vscr |= (1 << VSCR_SAT);
1130 }
1131}
1132
aa9e930c 1133#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1134 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1135 { \
1136 int i; \
1137 \
1138 VECTOR_FOR_INORDER_I(i, prod_element) { \
1139 if (evenp) { \
aa9e930c
TM
1140 r->prod_element[i] = \
1141 (cast)a->mul_element[i * 2 + HI_IDX] * \
1142 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1143 } else { \
aa9e930c
TM
1144 r->prod_element[i] = \
1145 (cast)a->mul_element[i * 2 + LO_IDX] * \
1146 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1147 } \
1148 } \
1149 }
aa9e930c
TM
1150#define VMUL(suffix, mul_element, prod_element, cast) \
1151 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1152 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1153VMUL(sb, s8, s16, int16_t)
1154VMUL(sh, s16, s32, int32_t)
63be0936 1155VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1156VMUL(ub, u8, u16, uint16_t)
1157VMUL(uh, u16, u32, uint32_t)
63be0936 1158VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1159#undef VMUL_DO
1160#undef VMUL
1161
d15f74fb
BS
1162void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1163 ppc_avr_t *c)
64654ded
BS
1164{
1165 ppc_avr_t result;
1166 int i;
1167
1168 VECTOR_FOR_INORDER_I(i, u8) {
1169 int s = c->u8[i] & 0x1f;
1170#if defined(HOST_WORDS_BIGENDIAN)
1171 int index = s & 0xf;
1172#else
1173 int index = 15 - (s & 0xf);
1174#endif
1175
1176 if (s & 0x10) {
1177 result.u8[i] = b->u8[index];
1178 } else {
1179 result.u8[i] = a->u8[index];
1180 }
1181 }
1182 *r = result;
1183}
1184
ab045436
RS
1185void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1186 ppc_avr_t *c)
1187{
1188 ppc_avr_t result;
1189 int i;
1190
1191 VECTOR_FOR_INORDER_I(i, u8) {
1192 int s = c->u8[i] & 0x1f;
1193#if defined(HOST_WORDS_BIGENDIAN)
1194 int index = 15 - (s & 0xf);
1195#else
1196 int index = s & 0xf;
1197#endif
1198
1199 if (s & 0x10) {
1200 result.u8[i] = a->u8[index];
1201 } else {
1202 result.u8[i] = b->u8[index];
1203 }
1204 }
1205 *r = result;
1206}
1207
4d82038e
TM
1208#if defined(HOST_WORDS_BIGENDIAN)
1209#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1210#define VBPERMD_INDEX(i) (i)
4d82038e 1211#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
01fe9a47 1212#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
4d82038e
TM
1213#else
1214#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
01fe9a47 1215#define VBPERMD_INDEX(i) (1 - i)
4d82038e 1216#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
01fe9a47
RS
1217#define EXTRACT_BIT(avr, i, index) \
1218 (extract64((avr)->u64[1 - i], 63 - index, 1))
4d82038e
TM
1219#endif
1220
01fe9a47
RS
1221void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1222{
1223 int i, j;
1224 ppc_avr_t result = { .u64 = { 0, 0 } };
1225 VECTOR_FOR_INORDER_I(i, u64) {
1226 for (j = 0; j < 8; j++) {
1227 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1228 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1229 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1230 }
1231 }
1232 }
1233 *r = result;
1234}
1235
4d82038e
TM
1236void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1237{
1238 int i;
1239 uint64_t perm = 0;
1240
1241 VECTOR_FOR_INORDER_I(i, u8) {
1242 int index = VBPERMQ_INDEX(b, i);
1243
1244 if (index < 128) {
1245 uint64_t mask = (1ull << (63-(index & 0x3F)));
1246 if (a->u64[VBPERMQ_DW(index)] & mask) {
1247 perm |= (0x8000 >> i);
1248 }
1249 }
1250 }
1251
1252 r->u64[HI_IDX] = perm;
1253 r->u64[LO_IDX] = 0;
1254}
1255
1256#undef VBPERMQ_INDEX
1257#undef VBPERMQ_DW
1258
cfd54a04 1259static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1260 0x0000000000000000ull, /* 00 */
1261 0x0000000000000080ull, /* 01 */
1262 0x0000000000008000ull, /* 02 */
1263 0x0000000000008080ull, /* 03 */
1264 0x0000000000800000ull, /* 04 */
1265 0x0000000000800080ull, /* 05 */
1266 0x0000000000808000ull, /* 06 */
1267 0x0000000000808080ull, /* 07 */
1268 0x0000000080000000ull, /* 08 */
1269 0x0000000080000080ull, /* 09 */
1270 0x0000000080008000ull, /* 0A */
1271 0x0000000080008080ull, /* 0B */
1272 0x0000000080800000ull, /* 0C */
1273 0x0000000080800080ull, /* 0D */
1274 0x0000000080808000ull, /* 0E */
1275 0x0000000080808080ull, /* 0F */
1276 0x0000008000000000ull, /* 10 */
1277 0x0000008000000080ull, /* 11 */
1278 0x0000008000008000ull, /* 12 */
1279 0x0000008000008080ull, /* 13 */
1280 0x0000008000800000ull, /* 14 */
1281 0x0000008000800080ull, /* 15 */
1282 0x0000008000808000ull, /* 16 */
1283 0x0000008000808080ull, /* 17 */
1284 0x0000008080000000ull, /* 18 */
1285 0x0000008080000080ull, /* 19 */
1286 0x0000008080008000ull, /* 1A */
1287 0x0000008080008080ull, /* 1B */
1288 0x0000008080800000ull, /* 1C */
1289 0x0000008080800080ull, /* 1D */
1290 0x0000008080808000ull, /* 1E */
1291 0x0000008080808080ull, /* 1F */
1292 0x0000800000000000ull, /* 20 */
1293 0x0000800000000080ull, /* 21 */
1294 0x0000800000008000ull, /* 22 */
1295 0x0000800000008080ull, /* 23 */
1296 0x0000800000800000ull, /* 24 */
1297 0x0000800000800080ull, /* 25 */
1298 0x0000800000808000ull, /* 26 */
1299 0x0000800000808080ull, /* 27 */
1300 0x0000800080000000ull, /* 28 */
1301 0x0000800080000080ull, /* 29 */
1302 0x0000800080008000ull, /* 2A */
1303 0x0000800080008080ull, /* 2B */
1304 0x0000800080800000ull, /* 2C */
1305 0x0000800080800080ull, /* 2D */
1306 0x0000800080808000ull, /* 2E */
1307 0x0000800080808080ull, /* 2F */
1308 0x0000808000000000ull, /* 30 */
1309 0x0000808000000080ull, /* 31 */
1310 0x0000808000008000ull, /* 32 */
1311 0x0000808000008080ull, /* 33 */
1312 0x0000808000800000ull, /* 34 */
1313 0x0000808000800080ull, /* 35 */
1314 0x0000808000808000ull, /* 36 */
1315 0x0000808000808080ull, /* 37 */
1316 0x0000808080000000ull, /* 38 */
1317 0x0000808080000080ull, /* 39 */
1318 0x0000808080008000ull, /* 3A */
1319 0x0000808080008080ull, /* 3B */
1320 0x0000808080800000ull, /* 3C */
1321 0x0000808080800080ull, /* 3D */
1322 0x0000808080808000ull, /* 3E */
1323 0x0000808080808080ull, /* 3F */
1324 0x0080000000000000ull, /* 40 */
1325 0x0080000000000080ull, /* 41 */
1326 0x0080000000008000ull, /* 42 */
1327 0x0080000000008080ull, /* 43 */
1328 0x0080000000800000ull, /* 44 */
1329 0x0080000000800080ull, /* 45 */
1330 0x0080000000808000ull, /* 46 */
1331 0x0080000000808080ull, /* 47 */
1332 0x0080000080000000ull, /* 48 */
1333 0x0080000080000080ull, /* 49 */
1334 0x0080000080008000ull, /* 4A */
1335 0x0080000080008080ull, /* 4B */
1336 0x0080000080800000ull, /* 4C */
1337 0x0080000080800080ull, /* 4D */
1338 0x0080000080808000ull, /* 4E */
1339 0x0080000080808080ull, /* 4F */
1340 0x0080008000000000ull, /* 50 */
1341 0x0080008000000080ull, /* 51 */
1342 0x0080008000008000ull, /* 52 */
1343 0x0080008000008080ull, /* 53 */
1344 0x0080008000800000ull, /* 54 */
1345 0x0080008000800080ull, /* 55 */
1346 0x0080008000808000ull, /* 56 */
1347 0x0080008000808080ull, /* 57 */
1348 0x0080008080000000ull, /* 58 */
1349 0x0080008080000080ull, /* 59 */
1350 0x0080008080008000ull, /* 5A */
1351 0x0080008080008080ull, /* 5B */
1352 0x0080008080800000ull, /* 5C */
1353 0x0080008080800080ull, /* 5D */
1354 0x0080008080808000ull, /* 5E */
1355 0x0080008080808080ull, /* 5F */
1356 0x0080800000000000ull, /* 60 */
1357 0x0080800000000080ull, /* 61 */
1358 0x0080800000008000ull, /* 62 */
1359 0x0080800000008080ull, /* 63 */
1360 0x0080800000800000ull, /* 64 */
1361 0x0080800000800080ull, /* 65 */
1362 0x0080800000808000ull, /* 66 */
1363 0x0080800000808080ull, /* 67 */
1364 0x0080800080000000ull, /* 68 */
1365 0x0080800080000080ull, /* 69 */
1366 0x0080800080008000ull, /* 6A */
1367 0x0080800080008080ull, /* 6B */
1368 0x0080800080800000ull, /* 6C */
1369 0x0080800080800080ull, /* 6D */
1370 0x0080800080808000ull, /* 6E */
1371 0x0080800080808080ull, /* 6F */
1372 0x0080808000000000ull, /* 70 */
1373 0x0080808000000080ull, /* 71 */
1374 0x0080808000008000ull, /* 72 */
1375 0x0080808000008080ull, /* 73 */
1376 0x0080808000800000ull, /* 74 */
1377 0x0080808000800080ull, /* 75 */
1378 0x0080808000808000ull, /* 76 */
1379 0x0080808000808080ull, /* 77 */
1380 0x0080808080000000ull, /* 78 */
1381 0x0080808080000080ull, /* 79 */
1382 0x0080808080008000ull, /* 7A */
1383 0x0080808080008080ull, /* 7B */
1384 0x0080808080800000ull, /* 7C */
1385 0x0080808080800080ull, /* 7D */
1386 0x0080808080808000ull, /* 7E */
1387 0x0080808080808080ull, /* 7F */
1388 0x8000000000000000ull, /* 80 */
1389 0x8000000000000080ull, /* 81 */
1390 0x8000000000008000ull, /* 82 */
1391 0x8000000000008080ull, /* 83 */
1392 0x8000000000800000ull, /* 84 */
1393 0x8000000000800080ull, /* 85 */
1394 0x8000000000808000ull, /* 86 */
1395 0x8000000000808080ull, /* 87 */
1396 0x8000000080000000ull, /* 88 */
1397 0x8000000080000080ull, /* 89 */
1398 0x8000000080008000ull, /* 8A */
1399 0x8000000080008080ull, /* 8B */
1400 0x8000000080800000ull, /* 8C */
1401 0x8000000080800080ull, /* 8D */
1402 0x8000000080808000ull, /* 8E */
1403 0x8000000080808080ull, /* 8F */
1404 0x8000008000000000ull, /* 90 */
1405 0x8000008000000080ull, /* 91 */
1406 0x8000008000008000ull, /* 92 */
1407 0x8000008000008080ull, /* 93 */
1408 0x8000008000800000ull, /* 94 */
1409 0x8000008000800080ull, /* 95 */
1410 0x8000008000808000ull, /* 96 */
1411 0x8000008000808080ull, /* 97 */
1412 0x8000008080000000ull, /* 98 */
1413 0x8000008080000080ull, /* 99 */
1414 0x8000008080008000ull, /* 9A */
1415 0x8000008080008080ull, /* 9B */
1416 0x8000008080800000ull, /* 9C */
1417 0x8000008080800080ull, /* 9D */
1418 0x8000008080808000ull, /* 9E */
1419 0x8000008080808080ull, /* 9F */
1420 0x8000800000000000ull, /* A0 */
1421 0x8000800000000080ull, /* A1 */
1422 0x8000800000008000ull, /* A2 */
1423 0x8000800000008080ull, /* A3 */
1424 0x8000800000800000ull, /* A4 */
1425 0x8000800000800080ull, /* A5 */
1426 0x8000800000808000ull, /* A6 */
1427 0x8000800000808080ull, /* A7 */
1428 0x8000800080000000ull, /* A8 */
1429 0x8000800080000080ull, /* A9 */
1430 0x8000800080008000ull, /* AA */
1431 0x8000800080008080ull, /* AB */
1432 0x8000800080800000ull, /* AC */
1433 0x8000800080800080ull, /* AD */
1434 0x8000800080808000ull, /* AE */
1435 0x8000800080808080ull, /* AF */
1436 0x8000808000000000ull, /* B0 */
1437 0x8000808000000080ull, /* B1 */
1438 0x8000808000008000ull, /* B2 */
1439 0x8000808000008080ull, /* B3 */
1440 0x8000808000800000ull, /* B4 */
1441 0x8000808000800080ull, /* B5 */
1442 0x8000808000808000ull, /* B6 */
1443 0x8000808000808080ull, /* B7 */
1444 0x8000808080000000ull, /* B8 */
1445 0x8000808080000080ull, /* B9 */
1446 0x8000808080008000ull, /* BA */
1447 0x8000808080008080ull, /* BB */
1448 0x8000808080800000ull, /* BC */
1449 0x8000808080800080ull, /* BD */
1450 0x8000808080808000ull, /* BE */
1451 0x8000808080808080ull, /* BF */
1452 0x8080000000000000ull, /* C0 */
1453 0x8080000000000080ull, /* C1 */
1454 0x8080000000008000ull, /* C2 */
1455 0x8080000000008080ull, /* C3 */
1456 0x8080000000800000ull, /* C4 */
1457 0x8080000000800080ull, /* C5 */
1458 0x8080000000808000ull, /* C6 */
1459 0x8080000000808080ull, /* C7 */
1460 0x8080000080000000ull, /* C8 */
1461 0x8080000080000080ull, /* C9 */
1462 0x8080000080008000ull, /* CA */
1463 0x8080000080008080ull, /* CB */
1464 0x8080000080800000ull, /* CC */
1465 0x8080000080800080ull, /* CD */
1466 0x8080000080808000ull, /* CE */
1467 0x8080000080808080ull, /* CF */
1468 0x8080008000000000ull, /* D0 */
1469 0x8080008000000080ull, /* D1 */
1470 0x8080008000008000ull, /* D2 */
1471 0x8080008000008080ull, /* D3 */
1472 0x8080008000800000ull, /* D4 */
1473 0x8080008000800080ull, /* D5 */
1474 0x8080008000808000ull, /* D6 */
1475 0x8080008000808080ull, /* D7 */
1476 0x8080008080000000ull, /* D8 */
1477 0x8080008080000080ull, /* D9 */
1478 0x8080008080008000ull, /* DA */
1479 0x8080008080008080ull, /* DB */
1480 0x8080008080800000ull, /* DC */
1481 0x8080008080800080ull, /* DD */
1482 0x8080008080808000ull, /* DE */
1483 0x8080008080808080ull, /* DF */
1484 0x8080800000000000ull, /* E0 */
1485 0x8080800000000080ull, /* E1 */
1486 0x8080800000008000ull, /* E2 */
1487 0x8080800000008080ull, /* E3 */
1488 0x8080800000800000ull, /* E4 */
1489 0x8080800000800080ull, /* E5 */
1490 0x8080800000808000ull, /* E6 */
1491 0x8080800000808080ull, /* E7 */
1492 0x8080800080000000ull, /* E8 */
1493 0x8080800080000080ull, /* E9 */
1494 0x8080800080008000ull, /* EA */
1495 0x8080800080008080ull, /* EB */
1496 0x8080800080800000ull, /* EC */
1497 0x8080800080800080ull, /* ED */
1498 0x8080800080808000ull, /* EE */
1499 0x8080800080808080ull, /* EF */
1500 0x8080808000000000ull, /* F0 */
1501 0x8080808000000080ull, /* F1 */
1502 0x8080808000008000ull, /* F2 */
1503 0x8080808000008080ull, /* F3 */
1504 0x8080808000800000ull, /* F4 */
1505 0x8080808000800080ull, /* F5 */
1506 0x8080808000808000ull, /* F6 */
1507 0x8080808000808080ull, /* F7 */
1508 0x8080808080000000ull, /* F8 */
1509 0x8080808080000080ull, /* F9 */
1510 0x8080808080008000ull, /* FA */
1511 0x8080808080008080ull, /* FB */
1512 0x8080808080800000ull, /* FC */
1513 0x8080808080800080ull, /* FD */
1514 0x8080808080808000ull, /* FE */
1515 0x8080808080808080ull, /* FF */
1516};
1517
1518void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1519{
1520 int i;
1521 uint64_t t[2] = { 0, 0 };
1522
1523 VECTOR_FOR_INORDER_I(i, u8) {
1524#if defined(HOST_WORDS_BIGENDIAN)
1525 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1526#else
1527 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1528#endif
1529 }
1530
1531 r->u64[0] = t[0];
1532 r->u64[1] = t[1];
1533}
1534
b8476fc7
TM
1535#define PMSUM(name, srcfld, trgfld, trgtyp) \
1536void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1537{ \
1538 int i, j; \
1539 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1540 \
1541 VECTOR_FOR_INORDER_I(i, srcfld) { \
1542 prod[i] = 0; \
1543 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1544 if (a->srcfld[i] & (1ull<<j)) { \
1545 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1546 } \
1547 } \
1548 } \
1549 \
1550 VECTOR_FOR_INORDER_I(i, trgfld) { \
1551 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1552 } \
1553}
1554
1555PMSUM(vpmsumb, u8, u16, uint16_t)
1556PMSUM(vpmsumh, u16, u32, uint32_t)
1557PMSUM(vpmsumw, u32, u64, uint64_t)
1558
1559void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1560{
1561
1562#ifdef CONFIG_INT128
1563 int i, j;
1564 __uint128_t prod[2];
1565
1566 VECTOR_FOR_INORDER_I(i, u64) {
1567 prod[i] = 0;
1568 for (j = 0; j < 64; j++) {
1569 if (a->u64[i] & (1ull<<j)) {
1570 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1571 }
1572 }
1573 }
1574
1575 r->u128 = prod[0] ^ prod[1];
1576
1577#else
1578 int i, j;
1579 ppc_avr_t prod[2];
1580
1581 VECTOR_FOR_INORDER_I(i, u64) {
1582 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1583 for (j = 0; j < 64; j++) {
1584 if (a->u64[i] & (1ull<<j)) {
1585 ppc_avr_t bshift;
1586 if (j == 0) {
1587 bshift.u64[HI_IDX] = 0;
1588 bshift.u64[LO_IDX] = b->u64[i];
1589 } else {
1590 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1591 bshift.u64[LO_IDX] = b->u64[i] << j;
1592 }
1593 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1594 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1595 }
1596 }
1597 }
1598
1599 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1600 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1601#endif
1602}
1603
1604
64654ded
BS
1605#if defined(HOST_WORDS_BIGENDIAN)
1606#define PKBIG 1
1607#else
1608#define PKBIG 0
1609#endif
1610void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1611{
1612 int i, j;
1613 ppc_avr_t result;
1614#if defined(HOST_WORDS_BIGENDIAN)
1615 const ppc_avr_t *x[2] = { a, b };
1616#else
1617 const ppc_avr_t *x[2] = { b, a };
1618#endif
1619
1620 VECTOR_FOR_INORDER_I(i, u64) {
1621 VECTOR_FOR_INORDER_I(j, u32) {
1622 uint32_t e = x[i]->u32[j];
1623
1624 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1625 ((e >> 6) & 0x3e0) |
1626 ((e >> 3) & 0x1f));
1627 }
1628 }
1629 *r = result;
1630}
1631
1632#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1633 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1634 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1635 { \
1636 int i; \
1637 int sat = 0; \
1638 ppc_avr_t result; \
1639 ppc_avr_t *a0 = PKBIG ? a : b; \
1640 ppc_avr_t *a1 = PKBIG ? b : a; \
1641 \
1642 VECTOR_FOR_INORDER_I(i, from) { \
1643 result.to[i] = cvt(a0->from[i], &sat); \
1644 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1645 } \
1646 *r = result; \
1647 if (dosat && sat) { \
1648 env->vscr |= (1 << VSCR_SAT); \
1649 } \
1650 }
1651#define I(x, y) (x)
1652VPK(shss, s16, s8, cvtshsb, 1)
1653VPK(shus, s16, u8, cvtshub, 1)
1654VPK(swss, s32, s16, cvtswsh, 1)
1655VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1656VPK(sdss, s64, s32, cvtsdsw, 1)
1657VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1658VPK(uhus, u16, u8, cvtuhub, 1)
1659VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1660VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1661VPK(uhum, u16, u8, I, 0)
1662VPK(uwum, u32, u16, I, 0)
024215b2 1663VPK(udum, u64, u32, I, 0)
64654ded
BS
1664#undef I
1665#undef VPK
1666#undef PKBIG
1667
d15f74fb 1668void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1669{
1670 int i;
1671
1672 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1673 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1674 }
1675}
1676
1677#define VRFI(suffix, rounding) \
d15f74fb
BS
1678 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1679 ppc_avr_t *b) \
64654ded
BS
1680 { \
1681 int i; \
1682 float_status s = env->vec_status; \
1683 \
1684 set_float_rounding_mode(rounding, &s); \
1685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1686 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1687 } \
1688 }
1689VRFI(n, float_round_nearest_even)
1690VRFI(m, float_round_down)
1691VRFI(p, float_round_up)
1692VRFI(z, float_round_to_zero)
1693#undef VRFI
1694
818692ff 1695#define VROTATE(suffix, element, mask) \
64654ded
BS
1696 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1697 { \
1698 int i; \
1699 \
1700 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1701 unsigned int shift = b->element[i] & mask; \
1702 r->element[i] = (a->element[i] << shift) | \
1703 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1704 } \
1705 }
818692ff
TM
1706VROTATE(b, u8, 0x7)
1707VROTATE(h, u16, 0xF)
1708VROTATE(w, u32, 0x1F)
2fdf78e6 1709VROTATE(d, u64, 0x3F)
64654ded
BS
1710#undef VROTATE
1711
d15f74fb 1712void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1713{
1714 int i;
1715
1716 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1717 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1718
ef9bd150 1719 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1720 }
1721}
1722
09a245e1 1723#define VRLMI(name, size, element, insert) \
3e00884f
GS
1724void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1725{ \
1726 int i; \
1727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1728 uint##size##_t src1 = a->element[i]; \
1729 uint##size##_t src2 = b->element[i]; \
1730 uint##size##_t src3 = r->element[i]; \
1731 uint##size##_t begin, end, shift, mask, rot_val; \
1732 \
1733 shift = extract##size(src2, 0, 6); \
1734 end = extract##size(src2, 8, 6); \
1735 begin = extract##size(src2, 16, 6); \
1736 rot_val = rol##size(src1, shift); \
1737 mask = mask_u##size(begin, end); \
09a245e1
BR
1738 if (insert) { \
1739 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1740 } else { \
1741 r->element[i] = (rot_val & mask); \
1742 } \
3e00884f
GS
1743 } \
1744}
1745
09a245e1
BR
1746VRLMI(vrldmi, 64, u64, 1);
1747VRLMI(vrlwmi, 32, u32, 1);
1748VRLMI(vrldnm, 64, u64, 0);
1749VRLMI(vrlwnm, 32, u32, 0);
3e00884f 1750
d15f74fb
BS
1751void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1752 ppc_avr_t *c)
64654ded
BS
1753{
1754 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1755 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1756}
1757
d15f74fb 1758void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1759{
1760 int i;
1761
1762 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1763 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1764 }
1765}
1766
d15f74fb 1767void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1768{
1769 int i;
1770
1771 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1772 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1773 }
1774}
1775
64654ded
BS
1776/* The specification says that the results are undefined if all of the
1777 * shift counts are not identical. We check to make sure that they are
1778 * to conform to what real hardware appears to do. */
1779#define VSHIFT(suffix, leftp) \
1780 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1781 { \
1782 int shift = b->u8[LO_IDX*15] & 0x7; \
1783 int doit = 1; \
1784 int i; \
1785 \
1786 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1787 doit = doit && ((b->u8[i] & 0x7) == shift); \
1788 } \
1789 if (doit) { \
1790 if (shift == 0) { \
1791 *r = *a; \
1792 } else if (leftp) { \
1793 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1794 \
1795 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1796 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1797 } else { \
1798 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1799 \
1800 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1801 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1802 } \
1803 } \
1804 }
24e669ba
TM
1805VSHIFT(l, 1)
1806VSHIFT(r, 0)
64654ded 1807#undef VSHIFT
64654ded 1808
818692ff 1809#define VSL(suffix, element, mask) \
64654ded
BS
1810 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1811 { \
1812 int i; \
1813 \
1814 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1815 unsigned int shift = b->element[i] & mask; \
1816 \
1817 r->element[i] = a->element[i] << shift; \
1818 } \
1819 }
818692ff
TM
1820VSL(b, u8, 0x7)
1821VSL(h, u16, 0x0F)
1822VSL(w, u32, 0x1F)
2fdf78e6 1823VSL(d, u64, 0x3F)
64654ded
BS
1824#undef VSL
1825
5644a175
VAS
1826void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1827{
1828 int i;
1829 unsigned int shift, bytes, size;
1830
1831 size = ARRAY_SIZE(r->u8);
1832 for (i = 0; i < size; i++) {
1833 shift = b->u8[i] & 0x7; /* extract shift value */
1834 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1835 (((i + 1) < size) ? a->u8[i + 1] : 0);
1836 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1837 }
1838}
1839
4004c1db
VAS
1840void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1841{
1842 int i;
1843 unsigned int shift, bytes;
1844
1845 /* Use reverse order, as destination and source register can be same. Its
1846 * being modified in place saving temporary, reverse order will guarantee
1847 * that computed result is not fed back.
1848 */
1849 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1850 shift = b->u8[i] & 0x7; /* extract shift value */
1851 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1852 /* extract adjacent bytes */
1853 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1854 }
1855}
1856
64654ded
BS
1857void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1858{
1859 int sh = shift & 0xf;
1860 int i;
1861 ppc_avr_t result;
1862
1863#if defined(HOST_WORDS_BIGENDIAN)
1864 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1865 int index = sh + i;
1866 if (index > 0xf) {
1867 result.u8[i] = b->u8[index - 0x10];
1868 } else {
1869 result.u8[i] = a->u8[index];
1870 }
1871 }
1872#else
1873 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1874 int index = (16 - sh) + i;
1875 if (index > 0xf) {
1876 result.u8[i] = a->u8[index - 0x10];
1877 } else {
1878 result.u8[i] = b->u8[index];
1879 }
1880 }
1881#endif
1882 *r = result;
1883}
1884
1885void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1886{
1887 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1888
1889#if defined(HOST_WORDS_BIGENDIAN)
1890 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1891 memset(&r->u8[16-sh], 0, sh);
1892#else
1893 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1894 memset(&r->u8[0], 0, sh);
1895#endif
1896}
1897
1898/* Experimental testing shows that hardware masks the immediate. */
1899#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1900#if defined(HOST_WORDS_BIGENDIAN)
1901#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1902#else
1903#define SPLAT_ELEMENT(element) \
1904 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1905#endif
1906#define VSPLT(suffix, element) \
1907 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1908 { \
1909 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1910 int i; \
1911 \
1912 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1913 r->element[i] = s; \
1914 } \
1915 }
1916VSPLT(b, u8)
1917VSPLT(h, u16)
1918VSPLT(w, u32)
1919#undef VSPLT
1920#undef SPLAT_ELEMENT
1921#undef _SPLAT_MASKED
e7b1e06f
RS
1922#if defined(HOST_WORDS_BIGENDIAN)
1923#define VINSERT(suffix, element) \
1924 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1925 { \
1926 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1927 sizeof(r->element[0])); \
1928 }
1929#else
1930#define VINSERT(suffix, element) \
1931 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1932 { \
1933 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1934 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1935 }
1936#endif
1937VINSERT(b, u8)
1938VINSERT(h, u16)
1939VINSERT(w, u32)
1940VINSERT(d, u64)
1941#undef VINSERT
b5d569a1
RS
1942#if defined(HOST_WORDS_BIGENDIAN)
1943#define VEXTRACT(suffix, element) \
1944 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1945 { \
1946 uint32_t es = sizeof(r->element[0]); \
1947 memmove(&r->u8[8 - es], &b->u8[index], es); \
1948 memset(&r->u8[8], 0, 8); \
1949 memset(&r->u8[0], 0, 8 - es); \
1950 }
1951#else
1952#define VEXTRACT(suffix, element) \
1953 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1954 { \
1955 uint32_t es = sizeof(r->element[0]); \
1956 uint32_t s = (16 - index) - es; \
1957 memmove(&r->u8[8], &b->u8[s], es); \
1958 memset(&r->u8[0], 0, 8); \
1959 memset(&r->u8[8 + es], 0, 8 - es); \
1960 }
1961#endif
1962VEXTRACT(ub, u8)
1963VEXTRACT(uh, u16)
1964VEXTRACT(uw, u32)
1965VEXTRACT(d, u64)
1966#undef VEXTRACT
64654ded 1967
125a9b23
ND
1968#define VEXT_SIGNED(name, element, mask, cast, recast) \
1969void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1970{ \
1971 int i; \
1972 VECTOR_FOR_INORDER_I(i, element) { \
1973 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
1974 } \
1975}
1976VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
1977VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
1978VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
1979VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
1980VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
1981#undef VEXT_SIGNED
1982
cc8b6e76
ND
1983#define VNEG(name, element) \
1984void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1985{ \
1986 int i; \
1987 VECTOR_FOR_INORDER_I(i, element) { \
1988 r->element[i] = -b->element[i]; \
1989 } \
1990}
1991VNEG(vnegw, s32)
1992VNEG(vnegd, s64)
1993#undef VNEG
1994
64654ded
BS
1995#define VSPLTI(suffix, element, splat_type) \
1996 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1997 { \
1998 splat_type x = (int8_t)(splat << 3) >> 3; \
1999 int i; \
2000 \
2001 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2002 r->element[i] = x; \
2003 } \
2004 }
2005VSPLTI(b, s8, int8_t)
2006VSPLTI(h, s16, int16_t)
2007VSPLTI(w, s32, int32_t)
2008#undef VSPLTI
2009
818692ff 2010#define VSR(suffix, element, mask) \
64654ded
BS
2011 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2012 { \
2013 int i; \
2014 \
2015 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 2016 unsigned int shift = b->element[i] & mask; \
64654ded
BS
2017 r->element[i] = a->element[i] >> shift; \
2018 } \
2019 }
818692ff
TM
2020VSR(ab, s8, 0x7)
2021VSR(ah, s16, 0xF)
2022VSR(aw, s32, 0x1F)
2fdf78e6 2023VSR(ad, s64, 0x3F)
818692ff
TM
2024VSR(b, u8, 0x7)
2025VSR(h, u16, 0xF)
2026VSR(w, u32, 0x1F)
2fdf78e6 2027VSR(d, u64, 0x3F)
64654ded
BS
2028#undef VSR
2029
2030void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2031{
2032 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2033
2034#if defined(HOST_WORDS_BIGENDIAN)
2035 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2036 memset(&r->u8[0], 0, sh);
2037#else
2038 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2039 memset(&r->u8[16 - sh], 0, sh);
2040#endif
2041}
2042
2043void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2044{
2045 int i;
2046
2047 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2048 r->u32[i] = a->u32[i] >= b->u32[i];
2049 }
2050}
2051
d15f74fb 2052void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2053{
2054 int64_t t;
2055 int i, upper;
2056 ppc_avr_t result;
2057 int sat = 0;
2058
2059#if defined(HOST_WORDS_BIGENDIAN)
2060 upper = ARRAY_SIZE(r->s32)-1;
2061#else
2062 upper = 0;
2063#endif
2064 t = (int64_t)b->s32[upper];
2065 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2066 t += a->s32[i];
2067 result.s32[i] = 0;
2068 }
2069 result.s32[upper] = cvtsdsw(t, &sat);
2070 *r = result;
2071
2072 if (sat) {
2073 env->vscr |= (1 << VSCR_SAT);
2074 }
2075}
2076
d15f74fb 2077void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2078{
2079 int i, j, upper;
2080 ppc_avr_t result;
2081 int sat = 0;
2082
2083#if defined(HOST_WORDS_BIGENDIAN)
2084 upper = 1;
2085#else
2086 upper = 0;
2087#endif
2088 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2089 int64_t t = (int64_t)b->s32[upper + i * 2];
2090
2091 result.u64[i] = 0;
2092 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2093 t += a->s32[2 * i + j];
2094 }
2095 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2096 }
2097
2098 *r = result;
2099 if (sat) {
2100 env->vscr |= (1 << VSCR_SAT);
2101 }
2102}
2103
d15f74fb 2104void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2105{
2106 int i, j;
2107 int sat = 0;
2108
2109 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2110 int64_t t = (int64_t)b->s32[i];
2111
2112 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2113 t += a->s8[4 * i + j];
2114 }
2115 r->s32[i] = cvtsdsw(t, &sat);
2116 }
2117
2118 if (sat) {
2119 env->vscr |= (1 << VSCR_SAT);
2120 }
2121}
2122
d15f74fb 2123void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2124{
2125 int sat = 0;
2126 int i;
2127
2128 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2129 int64_t t = (int64_t)b->s32[i];
2130
2131 t += a->s16[2 * i] + a->s16[2 * i + 1];
2132 r->s32[i] = cvtsdsw(t, &sat);
2133 }
2134
2135 if (sat) {
2136 env->vscr |= (1 << VSCR_SAT);
2137 }
2138}
2139
d15f74fb 2140void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2141{
2142 int i, j;
2143 int sat = 0;
2144
2145 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2146 uint64_t t = (uint64_t)b->u32[i];
2147
2148 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2149 t += a->u8[4 * i + j];
2150 }
2151 r->u32[i] = cvtuduw(t, &sat);
2152 }
2153
2154 if (sat) {
2155 env->vscr |= (1 << VSCR_SAT);
2156 }
2157}
2158
2159#if defined(HOST_WORDS_BIGENDIAN)
2160#define UPKHI 1
2161#define UPKLO 0
2162#else
2163#define UPKHI 0
2164#define UPKLO 1
2165#endif
2166#define VUPKPX(suffix, hi) \
2167 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2168 { \
2169 int i; \
2170 ppc_avr_t result; \
2171 \
2172 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2173 uint16_t e = b->u16[hi ? i : i+4]; \
2174 uint8_t a = (e >> 15) ? 0xff : 0; \
2175 uint8_t r = (e >> 10) & 0x1f; \
2176 uint8_t g = (e >> 5) & 0x1f; \
2177 uint8_t b = e & 0x1f; \
2178 \
2179 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2180 } \
2181 *r = result; \
2182 }
2183VUPKPX(lpx, UPKLO)
2184VUPKPX(hpx, UPKHI)
2185#undef VUPKPX
2186
2187#define VUPK(suffix, unpacked, packee, hi) \
2188 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2189 { \
2190 int i; \
2191 ppc_avr_t result; \
2192 \
2193 if (hi) { \
2194 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2195 result.unpacked[i] = b->packee[i]; \
2196 } \
2197 } else { \
2198 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2199 i++) { \
2200 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2201 } \
2202 } \
2203 *r = result; \
2204 }
2205VUPK(hsb, s16, s8, UPKHI)
2206VUPK(hsh, s32, s16, UPKHI)
4430e076 2207VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2208VUPK(lsb, s16, s8, UPKLO)
2209VUPK(lsh, s32, s16, UPKLO)
4430e076 2210VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2211#undef VUPK
2212#undef UPKHI
2213#undef UPKLO
2214
f293f04a
TM
2215#define VGENERIC_DO(name, element) \
2216 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2217 { \
2218 int i; \
2219 \
2220 VECTOR_FOR_INORDER_I(i, element) { \
2221 r->element[i] = name(b->element[i]); \
2222 } \
2223 }
2224
2225#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2226#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2227#define clzw(v) clz32((v))
2228#define clzd(v) clz64((v))
2229
2230VGENERIC_DO(clzb, u8)
2231VGENERIC_DO(clzh, u16)
2232VGENERIC_DO(clzw, u32)
2233VGENERIC_DO(clzd, u64)
2234
2235#undef clzb
2236#undef clzh
2237#undef clzw
2238#undef clzd
2239
a5ad8fbf
RS
2240#define ctzb(v) ((v) ? ctz32(v) : 8)
2241#define ctzh(v) ((v) ? ctz32(v) : 16)
2242#define ctzw(v) ctz32((v))
2243#define ctzd(v) ctz64((v))
2244
2245VGENERIC_DO(ctzb, u8)
2246VGENERIC_DO(ctzh, u16)
2247VGENERIC_DO(ctzw, u32)
2248VGENERIC_DO(ctzd, u64)
2249
2250#undef ctzb
2251#undef ctzh
2252#undef ctzw
2253#undef ctzd
2254
e13500b3
TM
2255#define popcntb(v) ctpop8(v)
2256#define popcnth(v) ctpop16(v)
2257#define popcntw(v) ctpop32(v)
2258#define popcntd(v) ctpop64(v)
2259
2260VGENERIC_DO(popcntb, u8)
2261VGENERIC_DO(popcnth, u16)
2262VGENERIC_DO(popcntw, u32)
2263VGENERIC_DO(popcntd, u64)
2264
2265#undef popcntb
2266#undef popcnth
2267#undef popcntw
2268#undef popcntd
f293f04a
TM
2269
2270#undef VGENERIC_DO
2271
b41da4eb
TM
2272#if defined(HOST_WORDS_BIGENDIAN)
2273#define QW_ONE { .u64 = { 0, 1 } }
2274#else
2275#define QW_ONE { .u64 = { 1, 0 } }
2276#endif
2277
2278#ifndef CONFIG_INT128
2279
2280static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2281{
2282 t->u64[0] = ~a.u64[0];
2283 t->u64[1] = ~a.u64[1];
2284}
2285
2286static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2287{
2288 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2289 return -1;
2290 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2291 return 1;
2292 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2293 return -1;
2294 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2295 return 1;
2296 } else {
2297 return 0;
2298 }
2299}
2300
2301static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2302{
2303 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2304 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2305 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2306}
2307
2308static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2309{
2310 ppc_avr_t not_a;
2311 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2312 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2313 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2314 avr_qw_not(&not_a, a);
2315 return avr_qw_cmpu(not_a, b) < 0;
2316}
2317
2318#endif
2319
2320void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2321{
2322#ifdef CONFIG_INT128
2323 r->u128 = a->u128 + b->u128;
2324#else
2325 avr_qw_add(r, *a, *b);
2326#endif
2327}
2328
2329void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2330{
2331#ifdef CONFIG_INT128
2332 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2333#else
2334
2335 if (c->u64[LO_IDX] & 1) {
2336 ppc_avr_t tmp;
2337
2338 tmp.u64[HI_IDX] = 0;
2339 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2340 avr_qw_add(&tmp, *a, tmp);
2341 avr_qw_add(r, tmp, *b);
2342 } else {
2343 avr_qw_add(r, *a, *b);
2344 }
2345#endif
2346}
2347
2348void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2349{
2350#ifdef CONFIG_INT128
2351 r->u128 = (~a->u128 < b->u128);
2352#else
2353 ppc_avr_t not_a;
2354
2355 avr_qw_not(&not_a, *a);
2356
2357 r->u64[HI_IDX] = 0;
2358 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2359#endif
2360}
2361
2362void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2363{
2364#ifdef CONFIG_INT128
2365 int carry_out = (~a->u128 < b->u128);
2366 if (!carry_out && (c->u128 & 1)) {
2367 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2368 ((a->u128 != 0) || (b->u128 != 0));
2369 }
2370 r->u128 = carry_out;
2371#else
2372
2373 int carry_in = c->u64[LO_IDX] & 1;
2374 int carry_out = 0;
2375 ppc_avr_t tmp;
2376
2377 carry_out = avr_qw_addc(&tmp, *a, *b);
2378
2379 if (!carry_out && carry_in) {
2380 ppc_avr_t one = QW_ONE;
2381 carry_out = avr_qw_addc(&tmp, tmp, one);
2382 }
2383 r->u64[HI_IDX] = 0;
2384 r->u64[LO_IDX] = carry_out;
2385#endif
2386}
2387
2388void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2389{
2390#ifdef CONFIG_INT128
2391 r->u128 = a->u128 - b->u128;
2392#else
2393 ppc_avr_t tmp;
2394 ppc_avr_t one = QW_ONE;
2395
2396 avr_qw_not(&tmp, *b);
2397 avr_qw_add(&tmp, *a, tmp);
2398 avr_qw_add(r, tmp, one);
2399#endif
2400}
2401
2402void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2403{
2404#ifdef CONFIG_INT128
2405 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2406#else
2407 ppc_avr_t tmp, sum;
2408
2409 avr_qw_not(&tmp, *b);
2410 avr_qw_add(&sum, *a, tmp);
2411
2412 tmp.u64[HI_IDX] = 0;
2413 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2414 avr_qw_add(r, sum, tmp);
2415#endif
2416}
2417
2418void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2419{
2420#ifdef CONFIG_INT128
2421 r->u128 = (~a->u128 < ~b->u128) ||
2422 (a->u128 + ~b->u128 == (__uint128_t)-1);
2423#else
2424 int carry = (avr_qw_cmpu(*a, *b) > 0);
2425 if (!carry) {
2426 ppc_avr_t tmp;
2427 avr_qw_not(&tmp, *b);
2428 avr_qw_add(&tmp, *a, tmp);
2429 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2430 }
2431 r->u64[HI_IDX] = 0;
2432 r->u64[LO_IDX] = carry;
2433#endif
2434}
2435
2436void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2437{
2438#ifdef CONFIG_INT128
2439 r->u128 =
2440 (~a->u128 < ~b->u128) ||
2441 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2442#else
2443 int carry_in = c->u64[LO_IDX] & 1;
2444 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2445 if (!carry_out && carry_in) {
2446 ppc_avr_t tmp;
2447 avr_qw_not(&tmp, *b);
2448 avr_qw_add(&tmp, *a, tmp);
2449 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2450 }
2451
2452 r->u64[HI_IDX] = 0;
2453 r->u64[LO_IDX] = carry_out;
2454#endif
2455}
2456
e8f7b27b
TM
2457#define BCD_PLUS_PREF_1 0xC
2458#define BCD_PLUS_PREF_2 0xF
2459#define BCD_PLUS_ALT_1 0xA
2460#define BCD_NEG_PREF 0xD
2461#define BCD_NEG_ALT 0xB
2462#define BCD_PLUS_ALT_2 0xE
b8155872
JRZ
2463#define NATIONAL_PLUS 0x2B
2464#define NATIONAL_NEG 0x2D
e8f7b27b
TM
2465
2466#if defined(HOST_WORDS_BIGENDIAN)
2467#define BCD_DIG_BYTE(n) (15 - (n/2))
2468#else
2469#define BCD_DIG_BYTE(n) (n/2)
2470#endif
2471
2472static int bcd_get_sgn(ppc_avr_t *bcd)
2473{
2474 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2475 case BCD_PLUS_PREF_1:
2476 case BCD_PLUS_PREF_2:
2477 case BCD_PLUS_ALT_1:
2478 case BCD_PLUS_ALT_2:
2479 {
2480 return 1;
2481 }
2482
2483 case BCD_NEG_PREF:
2484 case BCD_NEG_ALT:
2485 {
2486 return -1;
2487 }
2488
2489 default:
2490 {
2491 return 0;
2492 }
2493 }
2494}
2495
2496static int bcd_preferred_sgn(int sgn, int ps)
2497{
2498 if (sgn >= 0) {
2499 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2500 } else {
2501 return BCD_NEG_PREF;
2502 }
2503}
2504
2505static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2506{
2507 uint8_t result;
2508 if (n & 1) {
2509 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2510 } else {
2511 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2512 }
2513
2514 if (unlikely(result > 9)) {
2515 *invalid = true;
2516 }
2517 return result;
2518}
2519
2520static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2521{
2522 if (n & 1) {
2523 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2524 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2525 } else {
2526 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2527 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2528 }
2529}
2530
b8155872
JRZ
2531static int bcd_cmp_zero(ppc_avr_t *bcd)
2532{
2533 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
efa73196 2534 return CRF_EQ;
b8155872 2535 } else {
efa73196 2536 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
b8155872
JRZ
2537 }
2538}
2539
2540static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2541{
2542#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2543 return reg->u16[7 - n];
b8155872
JRZ
2544#else
2545 return reg->u16[n];
2546#endif
2547}
2548
e2106d73
JRZ
2549static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2550{
2551#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2552 reg->u16[7 - n] = val;
e2106d73
JRZ
2553#else
2554 reg->u16[n] = val;
2555#endif
2556}
2557
e8f7b27b
TM
2558static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2559{
2560 int i;
2561 int invalid = 0;
2562 for (i = 31; i > 0; i--) {
2563 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2564 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2565 if (unlikely(invalid)) {
3b163b01 2566 return 0; /* doesn't matter */
e8f7b27b
TM
2567 } else if (dig_a > dig_b) {
2568 return 1;
2569 } else if (dig_a < dig_b) {
2570 return -1;
2571 }
2572 }
2573
2574 return 0;
2575}
2576
2577static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2578 int *overflow)
2579{
2580 int carry = 0;
2581 int i;
2582 int is_zero = 1;
2583 for (i = 1; i <= 31; i++) {
2584 uint8_t digit = bcd_get_digit(a, i, invalid) +
2585 bcd_get_digit(b, i, invalid) + carry;
2586 is_zero &= (digit == 0);
2587 if (digit > 9) {
2588 carry = 1;
2589 digit -= 10;
2590 } else {
2591 carry = 0;
2592 }
2593
2594 bcd_put_digit(t, digit, i);
2595
2596 if (unlikely(*invalid)) {
2597 return -1;
2598 }
2599 }
2600
2601 *overflow = carry;
2602 return is_zero;
2603}
2604
2605static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2606 int *overflow)
2607{
2608 int carry = 0;
2609 int i;
2610 int is_zero = 1;
2611 for (i = 1; i <= 31; i++) {
2612 uint8_t digit = bcd_get_digit(a, i, invalid) -
2613 bcd_get_digit(b, i, invalid) + carry;
2614 is_zero &= (digit == 0);
2615 if (digit & 0x80) {
2616 carry = -1;
2617 digit += 10;
2618 } else {
2619 carry = 0;
2620 }
2621
2622 bcd_put_digit(t, digit, i);
2623
2624 if (unlikely(*invalid)) {
2625 return -1;
2626 }
2627 }
2628
2629 *overflow = carry;
2630 return is_zero;
2631}
2632
2633uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2634{
2635
2636 int sgna = bcd_get_sgn(a);
2637 int sgnb = bcd_get_sgn(b);
2638 int invalid = (sgna == 0) || (sgnb == 0);
2639 int overflow = 0;
2640 int zero = 0;
2641 uint32_t cr = 0;
2642 ppc_avr_t result = { .u64 = { 0, 0 } };
2643
2644 if (!invalid) {
2645 if (sgna == sgnb) {
2646 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2647 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
efa73196 2648 cr = (sgna > 0) ? CRF_GT : CRF_LT;
e8f7b27b
TM
2649 } else if (bcd_cmp_mag(a, b) > 0) {
2650 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2651 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
efa73196 2652 cr = (sgna > 0) ? CRF_GT : CRF_LT;
e8f7b27b
TM
2653 } else {
2654 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2655 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
efa73196 2656 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
e8f7b27b
TM
2657 }
2658 }
2659
2660 if (unlikely(invalid)) {
2661 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
efa73196 2662 cr = CRF_SO;
e8f7b27b 2663 } else if (overflow) {
efa73196 2664 cr |= CRF_SO;
e8f7b27b 2665 } else if (zero) {
efa73196 2666 cr = CRF_EQ;
e8f7b27b
TM
2667 }
2668
2669 *r = result;
2670
2671 return cr;
2672}
2673
2674uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2675{
2676 ppc_avr_t bcopy = *b;
2677 int sgnb = bcd_get_sgn(b);
2678 if (sgnb < 0) {
2679 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2680 } else if (sgnb > 0) {
2681 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2682 }
2683 /* else invalid ... defer to bcdadd code for proper handling */
2684
2685 return helper_bcdadd(r, a, &bcopy, ps);
2686}
f293f04a 2687
b8155872
JRZ
2688uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2689{
2690 int i;
2691 int cr = 0;
2692 uint16_t national = 0;
2693 uint16_t sgnb = get_national_digit(b, 0);
2694 ppc_avr_t ret = { .u64 = { 0, 0 } };
2695 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2696
2697 for (i = 1; i < 8; i++) {
2698 national = get_national_digit(b, i);
2699 if (unlikely(national < 0x30 || national > 0x39)) {
2700 invalid = 1;
2701 break;
2702 }
2703
2704 bcd_put_digit(&ret, national & 0xf, i);
2705 }
2706
2707 if (sgnb == NATIONAL_PLUS) {
2708 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2709 } else {
2710 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2711 }
2712
2713 cr = bcd_cmp_zero(&ret);
2714
2715 if (unlikely(invalid)) {
efa73196 2716 cr = CRF_SO;
b8155872
JRZ
2717 }
2718
2719 *r = ret;
2720
2721 return cr;
2722}
2723
e2106d73
JRZ
2724uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2725{
2726 int i;
2727 int cr = 0;
2728 int sgnb = bcd_get_sgn(b);
2729 int invalid = (sgnb == 0);
2730 ppc_avr_t ret = { .u64 = { 0, 0 } };
2731
2732 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2733
2734 for (i = 1; i < 8; i++) {
2735 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2736
2737 if (unlikely(invalid)) {
2738 break;
2739 }
2740 }
2741 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2742
2743 cr = bcd_cmp_zero(b);
2744
2745 if (ox_flag) {
efa73196 2746 cr |= CRF_SO;
e2106d73
JRZ
2747 }
2748
2749 if (unlikely(invalid)) {
efa73196 2750 cr = CRF_SO;
e2106d73
JRZ
2751 }
2752
2753 *r = ret;
2754
2755 return cr;
2756}
2757
38f4cb04
JRZ
2758uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2759{
2760 int i;
2761 int cr = 0;
2762 int invalid = 0;
2763 int zone_digit = 0;
2764 int zone_lead = ps ? 0xF : 0x3;
2765 int digit = 0;
2766 ppc_avr_t ret = { .u64 = { 0, 0 } };
2767 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2768
2769 if (unlikely((sgnb < 0xA) && ps)) {
2770 invalid = 1;
2771 }
2772
2773 for (i = 0; i < 16; i++) {
2774 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2775 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2776 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2777 invalid = 1;
2778 break;
2779 }
2780
2781 bcd_put_digit(&ret, digit, i + 1);
2782 }
2783
2784 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2785 (!ps && (sgnb & 0x4))) {
2786 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2787 } else {
2788 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2789 }
2790
2791 cr = bcd_cmp_zero(&ret);
2792
2793 if (unlikely(invalid)) {
efa73196 2794 cr = CRF_SO;
38f4cb04
JRZ
2795 }
2796
2797 *r = ret;
2798
2799 return cr;
2800}
2801
0a890b31
JRZ
2802uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2803{
2804 int i;
2805 int cr = 0;
2806 uint8_t digit = 0;
2807 int sgnb = bcd_get_sgn(b);
2808 int zone_lead = (ps) ? 0xF0 : 0x30;
2809 int invalid = (sgnb == 0);
2810 ppc_avr_t ret = { .u64 = { 0, 0 } };
2811
2812 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2813
2814 for (i = 0; i < 16; i++) {
2815 digit = bcd_get_digit(b, i + 1, &invalid);
2816
2817 if (unlikely(invalid)) {
2818 break;
2819 }
2820
2821 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2822 }
2823
2824 if (ps) {
2825 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2826 } else {
2827 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2828 }
2829
2830 cr = bcd_cmp_zero(b);
2831
2832 if (ox_flag) {
efa73196 2833 cr |= CRF_SO;
0a890b31
JRZ
2834 }
2835
2836 if (unlikely(invalid)) {
efa73196 2837 cr = CRF_SO;
0a890b31
JRZ
2838 }
2839
2840 *r = ret;
2841
2842 return cr;
2843}
2844
a406c058
JRZ
2845uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2846{
2847 int i;
2848 int cr = 0;
2849 uint64_t lo_value;
2850 uint64_t hi_value;
2851 ppc_avr_t ret = { .u64 = { 0, 0 } };
2852
2853 if (b->s64[HI_IDX] < 0) {
2854 lo_value = -b->s64[LO_IDX];
2855 hi_value = ~b->u64[HI_IDX] + !lo_value;
2856 bcd_put_digit(&ret, 0xD, 0);
2857 } else {
2858 lo_value = b->u64[LO_IDX];
2859 hi_value = b->u64[HI_IDX];
2860 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2861 }
2862
2863 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2864 lo_value > 9999999999999999ULL) {
2865 cr = CRF_SO;
2866 }
2867
2868 for (i = 1; i < 16; hi_value /= 10, i++) {
2869 bcd_put_digit(&ret, hi_value % 10, i);
2870 }
2871
2872 for (; i < 32; lo_value /= 10, i++) {
2873 bcd_put_digit(&ret, lo_value % 10, i);
2874 }
2875
2876 cr |= bcd_cmp_zero(&ret);
2877
2878 *r = ret;
2879
2880 return cr;
2881}
2882
c85bc7dd
JRZ
2883uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2884{
2885 uint8_t i;
2886 int cr;
2887 uint64_t carry;
2888 uint64_t unused;
2889 uint64_t lo_value;
2890 uint64_t hi_value = 0;
2891 int sgnb = bcd_get_sgn(b);
2892 int invalid = (sgnb == 0);
2893
2894 lo_value = bcd_get_digit(b, 31, &invalid);
2895 for (i = 30; i > 0; i--) {
2896 mulu64(&lo_value, &carry, lo_value, 10ULL);
2897 mulu64(&hi_value, &unused, hi_value, 10ULL);
2898 lo_value += bcd_get_digit(b, i, &invalid);
2899 hi_value += carry;
2900
2901 if (unlikely(invalid)) {
2902 break;
2903 }
2904 }
2905
2906 if (sgnb == -1) {
2907 r->s64[LO_IDX] = -lo_value;
2908 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
2909 } else {
2910 r->s64[LO_IDX] = lo_value;
2911 r->s64[HI_IDX] = hi_value;
2912 }
2913
2914 cr = bcd_cmp_zero(b);
2915
2916 if (unlikely(invalid)) {
2917 cr = CRF_SO;
2918 }
2919
2920 return cr;
2921}
2922
c1542453 2923void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2924{
2925 int i;
2926 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2927 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2928 }
2929}
2930
c1542453 2931void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2932{
65cf1f65 2933 ppc_avr_t result;
557d52fa 2934 int i;
557d52fa 2935
c1542453 2936 VECTOR_FOR_INORDER_I(i, u32) {
65cf1f65 2937 result.AVRW(i) = b->AVRW(i) ^
c1542453
TM
2938 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2939 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2940 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2941 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
557d52fa 2942 }
65cf1f65 2943 *r = result;
557d52fa
TM
2944}
2945
557d52fa
TM
2946void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2947{
65cf1f65 2948 ppc_avr_t result;
c1542453
TM
2949 int i;
2950
2951 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2952 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
c1542453 2953 }
65cf1f65 2954 *r = result;
557d52fa
TM
2955}
2956
2957void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2958{
2959 /* This differs from what is written in ISA V2.07. The RTL is */
2960 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2961 int i;
2962 ppc_avr_t tmp;
2963
2964 VECTOR_FOR_INORDER_I(i, u8) {
2965 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2966 }
2967
2968 VECTOR_FOR_INORDER_I(i, u32) {
2969 r->AVRW(i) =
2970 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2971 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2972 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2973 AES_imc[tmp.AVRB(4*i + 3)][3];
2974 }
557d52fa
TM
2975}
2976
2977void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2978{
65cf1f65 2979 ppc_avr_t result;
c1542453
TM
2980 int i;
2981
2982 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2983 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
c1542453 2984 }
65cf1f65 2985 *r = result;
557d52fa
TM
2986}
2987
57354f8f
TM
2988#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2989#if defined(HOST_WORDS_BIGENDIAN)
2990#define EL_IDX(i) (i)
2991#else
2992#define EL_IDX(i) (3 - (i))
2993#endif
2994
2995void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2996{
2997 int st = (st_six & 0x10) != 0;
2998 int six = st_six & 0xF;
2999 int i;
3000
3001 VECTOR_FOR_INORDER_I(i, u32) {
3002 if (st == 0) {
3003 if ((six & (0x8 >> i)) == 0) {
3004 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3005 ROTRu32(a->u32[EL_IDX(i)], 18) ^
3006 (a->u32[EL_IDX(i)] >> 3);
3007 } else { /* six.bit[i] == 1 */
3008 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3009 ROTRu32(a->u32[EL_IDX(i)], 19) ^
3010 (a->u32[EL_IDX(i)] >> 10);
3011 }
3012 } else { /* st == 1 */
3013 if ((six & (0x8 >> i)) == 0) {
3014 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3015 ROTRu32(a->u32[EL_IDX(i)], 13) ^
3016 ROTRu32(a->u32[EL_IDX(i)], 22);
3017 } else { /* six.bit[i] == 1 */
3018 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3019 ROTRu32(a->u32[EL_IDX(i)], 11) ^
3020 ROTRu32(a->u32[EL_IDX(i)], 25);
3021 }
3022 }
3023 }
3024}
3025
3026#undef ROTRu32
3027#undef EL_IDX
3028
3029#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3030#if defined(HOST_WORDS_BIGENDIAN)
3031#define EL_IDX(i) (i)
3032#else
3033#define EL_IDX(i) (1 - (i))
3034#endif
3035
3036void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3037{
3038 int st = (st_six & 0x10) != 0;
3039 int six = st_six & 0xF;
3040 int i;
3041
3042 VECTOR_FOR_INORDER_I(i, u64) {
3043 if (st == 0) {
3044 if ((six & (0x8 >> (2*i))) == 0) {
3045 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3046 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3047 (a->u64[EL_IDX(i)] >> 7);
3048 } else { /* six.bit[2*i] == 1 */
3049 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3050 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3051 (a->u64[EL_IDX(i)] >> 6);
3052 }
3053 } else { /* st == 1 */
3054 if ((six & (0x8 >> (2*i))) == 0) {
3055 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3056 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3057 ROTRu64(a->u64[EL_IDX(i)], 39);
3058 } else { /* six.bit[2*i] == 1 */
3059 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3060 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3061 ROTRu64(a->u64[EL_IDX(i)], 41);
3062 }
3063 }
3064 }
3065}
3066
3067#undef ROTRu64
3068#undef EL_IDX
3069
ac174549
TM
3070void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3071{
65cf1f65 3072 ppc_avr_t result;
ac174549 3073 int i;
65cf1f65 3074
ac174549
TM
3075 VECTOR_FOR_INORDER_I(i, u8) {
3076 int indexA = c->u8[i] >> 4;
3077 int indexB = c->u8[i] & 0xF;
3078#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 3079 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 3080#else
65cf1f65 3081 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
3082#endif
3083 }
65cf1f65 3084 *r = result;
ac174549
TM
3085}
3086
64654ded
BS
3087#undef VECTOR_FOR_INORDER_I
3088#undef HI_IDX
3089#undef LO_IDX
3090
3091/*****************************************************************************/
3092/* SPE extension helpers */
3093/* Use a table to make this quicker */
ea6c0dac 3094static const uint8_t hbrev[16] = {
64654ded
BS
3095 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3096 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3097};
3098
3099static inline uint8_t byte_reverse(uint8_t val)
3100{
3101 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3102}
3103
3104static inline uint32_t word_reverse(uint32_t val)
3105{
3106 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3107 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3108}
3109
3110#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3111target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3112{
3113 uint32_t a, b, d, mask;
3114
3115 mask = UINT32_MAX >> (32 - MASKBITS);
3116 a = arg1 & mask;
3117 b = arg2 & mask;
3118 d = word_reverse(1 + word_reverse(a | ~b));
3119 return (arg1 & ~mask) | (d & b);
3120}
3121
3122uint32_t helper_cntlsw32(uint32_t val)
3123{
3124 if (val & 0x80000000) {
3125 return clz32(~val);
3126 } else {
3127 return clz32(val);
3128 }
3129}
3130
3131uint32_t helper_cntlzw32(uint32_t val)
3132{
3133 return clz32(val);
3134}
3135
3136/* 440 specific */
d15f74fb
BS
3137target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3138 target_ulong low, uint32_t update_Rc)
64654ded
BS
3139{
3140 target_ulong mask;
3141 int i;
3142
3143 i = 1;
3144 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3145 if ((high & mask) == 0) {
3146 if (update_Rc) {
3147 env->crf[0] = 0x4;
3148 }
3149 goto done;
3150 }
3151 i++;
3152 }
3153 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3154 if ((low & mask) == 0) {
3155 if (update_Rc) {
3156 env->crf[0] = 0x8;
3157 }
3158 goto done;
3159 }
3160 i++;
3161 }
ebbd8b40 3162 i = 8;
64654ded
BS
3163 if (update_Rc) {
3164 env->crf[0] = 0x2;
3165 }
3166 done:
3167 env->xer = (env->xer & ~0x7F) | i;
3168 if (update_Rc) {
3169 env->crf[0] |= xer_so;
3170 }
3171 return i;
3172}