]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/int_helper.c
Open 2.9 development tree
[mirror_qemu.git] / target-ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
3e00884f 21#include "internal.h"
63c91552 22#include "exec/exec-all.h"
1de7afc9 23#include "qemu/host-utils.h"
2ef6175a 24#include "exec/helper-proto.h"
6f2945cd 25#include "crypto/aes.h"
64654ded
BS
26
27#include "helper_regs.h"
28/*****************************************************************************/
29/* Fixed point operations helpers */
64654ded 30
6a4fda33
TM
31target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
33{
34 uint64_t rt = 0;
35 int overflow = 0;
36
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
39
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
45 }
46
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
49 }
50
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
56 }
57 }
58
59 return (target_ulong)rt;
60}
61
a98eb9e9
TM
62target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
64{
65 int64_t rt = 0;
66 int overflow = 0;
67
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
70
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
77 }
78
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
81 }
82
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
88 }
89 }
90
91 return (target_ulong)rt;
92}
93
98d1eb27
TM
94#if defined(TARGET_PPC64)
95
96uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97{
98 uint64_t rt = 0;
99 int overflow = 0;
100
101 overflow = divu128(&rt, &ra, rb);
102
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
105 }
106
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
112 }
113 }
114
115 return rt;
116}
117
e44259b6
TM
118uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119{
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
124
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
127 }
128
129 if (oe) {
130
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
135 }
136 }
137
138 return rt;
139}
140
98d1eb27
TM
141#endif
142
143
64654ded
BS
144target_ulong helper_cntlzw(target_ulong t)
145{
146 return clz32(t);
147}
148
b35344e4
ND
149target_ulong helper_cnttzw(target_ulong t)
150{
151 return ctz32(t);
152}
153
64654ded 154#if defined(TARGET_PPC64)
082ce330
ND
155/* if x = 0xab, returns 0xababababababababa */
156#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
157
158/* substract 1 from each byte, and with inverse, check if MSB is set at each
159 * byte.
160 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
161 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
162 */
163#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
164
165/* When you XOR the pattern and there is a match, that byte will be zero */
166#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
167
168uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
169{
170 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
171}
172
173#undef pattern
174#undef haszero
175#undef hasvalue
176
64654ded
BS
177target_ulong helper_cntlzd(target_ulong t)
178{
179 return clz64(t);
180}
e91d95b2
SD
181
182target_ulong helper_cnttzd(target_ulong t)
183{
184 return ctz64(t);
185}
fec5c62a
RB
186
187/* Return invalid random number.
188 *
189 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
190 * random number
191 */
192target_ulong helper_darn32(void)
193{
194 return -1;
195}
196
197target_ulong helper_darn64(void)
198{
199 return -1;
200}
201
64654ded
BS
202#endif
203
86ba37ed
TM
204#if defined(TARGET_PPC64)
205
206uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
207{
208 int i;
209 uint64_t ra = 0;
210
211 for (i = 0; i < 8; i++) {
212 int index = (rs >> (i*8)) & 0xFF;
213 if (index < 64) {
214 if (rb & (1ull << (63-index))) {
215 ra |= 1 << i;
216 }
217 }
218 }
219 return ra;
220}
221
222#endif
223
fcfda20f
AJ
224target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
225{
226 target_ulong mask = 0xff;
227 target_ulong ra = 0;
228 int i;
229
230 for (i = 0; i < sizeof(target_ulong); i++) {
231 if ((rs & mask) == (rb & mask)) {
232 ra |= mask;
233 }
234 mask <<= 8;
235 }
236 return ra;
237}
238
64654ded 239/* shift right arithmetic helper */
d15f74fb
BS
240target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
241 target_ulong shift)
64654ded
BS
242{
243 int32_t ret;
244
245 if (likely(!(shift & 0x20))) {
246 if (likely((uint32_t)shift != 0)) {
247 shift &= 0x1f;
248 ret = (int32_t)value >> shift;
249 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
da91a00f 250 env->ca = 0;
64654ded 251 } else {
da91a00f 252 env->ca = 1;
64654ded
BS
253 }
254 } else {
255 ret = (int32_t)value;
da91a00f 256 env->ca = 0;
64654ded
BS
257 }
258 } else {
259 ret = (int32_t)value >> 31;
da91a00f 260 env->ca = (ret != 0);
64654ded
BS
261 }
262 return (target_long)ret;
263}
264
265#if defined(TARGET_PPC64)
d15f74fb
BS
266target_ulong helper_srad(CPUPPCState *env, target_ulong value,
267 target_ulong shift)
64654ded
BS
268{
269 int64_t ret;
270
271 if (likely(!(shift & 0x40))) {
272 if (likely((uint64_t)shift != 0)) {
273 shift &= 0x3f;
274 ret = (int64_t)value >> shift;
4bc02e23 275 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
da91a00f 276 env->ca = 0;
64654ded 277 } else {
da91a00f 278 env->ca = 1;
64654ded
BS
279 }
280 } else {
281 ret = (int64_t)value;
da91a00f 282 env->ca = 0;
64654ded
BS
283 }
284 } else {
285 ret = (int64_t)value >> 63;
da91a00f 286 env->ca = (ret != 0);
64654ded
BS
287 }
288 return ret;
289}
290#endif
291
292#if defined(TARGET_PPC64)
293target_ulong helper_popcntb(target_ulong val)
294{
295 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
296 0x5555555555555555ULL);
297 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
298 0x3333333333333333ULL);
299 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
300 0x0f0f0f0f0f0f0f0fULL);
301 return val;
302}
303
304target_ulong helper_popcntw(target_ulong val)
305{
306 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
307 0x5555555555555555ULL);
308 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
309 0x3333333333333333ULL);
310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
311 0x0f0f0f0f0f0f0f0fULL);
312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
313 0x00ff00ff00ff00ffULL);
314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
315 0x0000ffff0000ffffULL);
316 return val;
317}
318
319target_ulong helper_popcntd(target_ulong val)
320{
321 return ctpop64(val);
322}
323#else
324target_ulong helper_popcntb(target_ulong val)
325{
326 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
327 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
328 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
329 return val;
330}
331
332target_ulong helper_popcntw(target_ulong val)
333{
334 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
335 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
336 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
337 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
338 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
339 return val;
340}
341#endif
342
343/*****************************************************************************/
344/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 345target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
346{
347 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
348
349 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
350 (int32_t)arg2 == 0) {
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->spr[SPR_MQ] = tmp % arg2;
355 return tmp / (int32_t)arg2;
356 }
357}
358
d15f74fb
BS
359target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
360 target_ulong arg2)
64654ded
BS
361{
362 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
363
364 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
365 (int32_t)arg2 == 0) {
da91a00f 366 env->so = env->ov = 1;
64654ded
BS
367 env->spr[SPR_MQ] = 0;
368 return INT32_MIN;
369 } else {
370 env->spr[SPR_MQ] = tmp % arg2;
371 tmp /= (int32_t)arg2;
372 if ((int32_t)tmp != tmp) {
da91a00f 373 env->so = env->ov = 1;
64654ded 374 } else {
da91a00f 375 env->ov = 0;
64654ded
BS
376 }
377 return tmp;
378 }
379}
380
d15f74fb
BS
381target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
382 target_ulong arg2)
64654ded
BS
383{
384 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
385 (int32_t)arg2 == 0) {
386 env->spr[SPR_MQ] = 0;
387 return INT32_MIN;
388 } else {
389 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
390 return (int32_t)arg1 / (int32_t)arg2;
391 }
392}
393
d15f74fb
BS
394target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
395 target_ulong arg2)
64654ded
BS
396{
397 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
398 (int32_t)arg2 == 0) {
da91a00f 399 env->so = env->ov = 1;
64654ded
BS
400 env->spr[SPR_MQ] = 0;
401 return INT32_MIN;
402 } else {
da91a00f 403 env->ov = 0;
64654ded
BS
404 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
405 return (int32_t)arg1 / (int32_t)arg2;
406 }
407}
408
409/*****************************************************************************/
410/* 602 specific instructions */
411/* mfrom is the most crazy instruction ever seen, imho ! */
412/* Real implementation uses a ROM table. Do the same */
413/* Extremely decomposed:
414 * -arg / 256
415 * return 256 * log10(10 + 1.0) + 0.5
416 */
417#if !defined(CONFIG_USER_ONLY)
418target_ulong helper_602_mfrom(target_ulong arg)
419{
420 if (likely(arg < 602)) {
421#include "mfrom_table.c"
422 return mfrom_ROM_table[arg];
423 } else {
424 return 0;
425 }
426}
427#endif
428
429/*****************************************************************************/
430/* Altivec extension helpers */
431#if defined(HOST_WORDS_BIGENDIAN)
432#define HI_IDX 0
433#define LO_IDX 1
c1542453
TM
434#define AVRB(i) u8[i]
435#define AVRW(i) u32[i]
64654ded
BS
436#else
437#define HI_IDX 1
438#define LO_IDX 0
c1542453
TM
439#define AVRB(i) u8[15-(i)]
440#define AVRW(i) u32[3-(i)]
64654ded
BS
441#endif
442
443#if defined(HOST_WORDS_BIGENDIAN)
444#define VECTOR_FOR_INORDER_I(index, element) \
445 for (index = 0; index < ARRAY_SIZE(r->element); index++)
446#else
447#define VECTOR_FOR_INORDER_I(index, element) \
448 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
449#endif
450
64654ded
BS
451/* Saturating arithmetic helpers. */
452#define SATCVT(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
454 { \
455 to_type r; \
456 \
457 if (x < (from_type)min) { \
458 r = min; \
459 *sat = 1; \
460 } else if (x > (from_type)max) { \
461 r = max; \
462 *sat = 1; \
463 } else { \
464 r = x; \
465 } \
466 return r; \
467 }
468#define SATCVTU(from, to, from_type, to_type, min, max) \
469 static inline to_type cvt##from##to(from_type x, int *sat) \
470 { \
471 to_type r; \
472 \
473 if (x > (from_type)max) { \
474 r = max; \
475 *sat = 1; \
476 } else { \
477 r = x; \
478 } \
479 return r; \
480 }
481SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
482SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
483SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
484
485SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
486SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
487SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
488SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
489SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
490SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
491#undef SATCVT
492#undef SATCVTU
493
494void helper_lvsl(ppc_avr_t *r, target_ulong sh)
495{
496 int i, j = (sh & 0xf);
497
498 VECTOR_FOR_INORDER_I(i, u8) {
499 r->u8[i] = j++;
500 }
501}
502
503void helper_lvsr(ppc_avr_t *r, target_ulong sh)
504{
505 int i, j = 0x10 - (sh & 0xf);
506
507 VECTOR_FOR_INORDER_I(i, u8) {
508 r->u8[i] = j++;
509 }
510}
511
d15f74fb 512void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
513{
514#if defined(HOST_WORDS_BIGENDIAN)
515 env->vscr = r->u32[3];
516#else
517 env->vscr = r->u32[0];
518#endif
519 set_flush_to_zero(vscr_nj, &env->vec_status);
520}
521
522void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
523{
524 int i;
525
526 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
527 r->u32[i] = ~a->u32[i] < b->u32[i];
528 }
529}
530
5c69452c
AK
531/* vprtybw */
532void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
533{
534 int i;
535 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
536 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
537 res ^= res >> 8;
538 r->u32[i] = res & 1;
539 }
540}
541
542/* vprtybd */
543void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
544{
545 int i;
546 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
547 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
548 res ^= res >> 16;
549 res ^= res >> 8;
550 r->u64[i] = res & 1;
551 }
552}
553
554/* vprtybq */
555void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
556{
557 uint64_t res = b->u64[0] ^ b->u64[1];
558 res ^= res >> 32;
559 res ^= res >> 16;
560 res ^= res >> 8;
561 r->u64[LO_IDX] = res & 1;
562 r->u64[HI_IDX] = 0;
563}
564
64654ded
BS
565#define VARITH_DO(name, op, element) \
566 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
567 { \
568 int i; \
569 \
570 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
571 r->element[i] = a->element[i] op b->element[i]; \
572 } \
573 }
574#define VARITH(suffix, element) \
575 VARITH_DO(add##suffix, +, element) \
576 VARITH_DO(sub##suffix, -, element)
577VARITH(ubm, u8)
578VARITH(uhm, u16)
579VARITH(uwm, u32)
56eabc75 580VARITH(udm, u64)
953f0f58 581VARITH_DO(muluwm, *, u32)
64654ded
BS
582#undef VARITH_DO
583#undef VARITH
584
585#define VARITHFP(suffix, func) \
d15f74fb
BS
586 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
587 ppc_avr_t *b) \
64654ded
BS
588 { \
589 int i; \
590 \
591 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 592 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
593 } \
594 }
595VARITHFP(addfp, float32_add)
596VARITHFP(subfp, float32_sub)
db1babb8
AJ
597VARITHFP(minfp, float32_min)
598VARITHFP(maxfp, float32_max)
64654ded
BS
599#undef VARITHFP
600
2f93c23f
AJ
601#define VARITHFPFMA(suffix, type) \
602 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
603 ppc_avr_t *b, ppc_avr_t *c) \
604 { \
605 int i; \
606 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
607 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
608 type, &env->vec_status); \
609 } \
610 }
611VARITHFPFMA(maddfp, 0);
612VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
613#undef VARITHFPFMA
614
64654ded
BS
615#define VARITHSAT_CASE(type, op, cvt, element) \
616 { \
617 type result = (type)a->element[i] op (type)b->element[i]; \
618 r->element[i] = cvt(result, &sat); \
619 }
620
621#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
622 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
623 ppc_avr_t *b) \
64654ded
BS
624 { \
625 int sat = 0; \
626 int i; \
627 \
628 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
629 switch (sizeof(r->element[0])) { \
630 case 1: \
631 VARITHSAT_CASE(optype, op, cvt, element); \
632 break; \
633 case 2: \
634 VARITHSAT_CASE(optype, op, cvt, element); \
635 break; \
636 case 4: \
637 VARITHSAT_CASE(optype, op, cvt, element); \
638 break; \
639 } \
640 } \
641 if (sat) { \
642 env->vscr |= (1 << VSCR_SAT); \
643 } \
644 }
645#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
646 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
647 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
648#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
649 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
650 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
651VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
652VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
653VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
654VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
655VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
656VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
657#undef VARITHSAT_CASE
658#undef VARITHSAT_DO
659#undef VARITHSAT_SIGNED
660#undef VARITHSAT_UNSIGNED
661
662#define VAVG_DO(name, element, etype) \
663 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
664 { \
665 int i; \
666 \
667 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
668 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
669 r->element[i] = x >> 1; \
670 } \
671 }
672
673#define VAVG(type, signed_element, signed_type, unsigned_element, \
674 unsigned_type) \
675 VAVG_DO(avgs##type, signed_element, signed_type) \
676 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
677VAVG(b, s8, int16_t, u8, uint16_t)
678VAVG(h, s16, int32_t, u16, uint32_t)
679VAVG(w, s32, int64_t, u32, uint64_t)
680#undef VAVG_DO
681#undef VAVG
682
37707059
SD
683#define VABSDU_DO(name, element) \
684void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
685{ \
686 int i; \
687 \
688 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
689 r->element[i] = (a->element[i] > b->element[i]) ? \
690 (a->element[i] - b->element[i]) : \
691 (b->element[i] - a->element[i]); \
692 } \
693}
694
695/* VABSDU - Vector absolute difference unsigned
696 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
697 * element - element type to access from vector
698 */
699#define VABSDU(type, element) \
700 VABSDU_DO(absdu##type, element)
701VABSDU(b, u8)
702VABSDU(h, u16)
703VABSDU(w, u32)
704#undef VABSDU_DO
705#undef VABSDU
706
64654ded 707#define VCF(suffix, cvt, element) \
d15f74fb
BS
708 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
709 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
710 { \
711 int i; \
712 \
713 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
714 float32 t = cvt(b->element[i], &env->vec_status); \
715 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
716 } \
717 }
718VCF(ux, uint32_to_float32, u32)
719VCF(sx, int32_to_float32, s32)
720#undef VCF
721
722#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
723 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
724 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 725 { \
6f3dab41
TM
726 uint64_t ones = (uint64_t)-1; \
727 uint64_t all = ones; \
728 uint64_t none = 0; \
64654ded
BS
729 int i; \
730 \
731 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 732 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
733 ones : 0x0); \
734 switch (sizeof(a->element[0])) { \
6f3dab41
TM
735 case 8: \
736 r->u64[i] = result; \
737 break; \
64654ded
BS
738 case 4: \
739 r->u32[i] = result; \
740 break; \
741 case 2: \
742 r->u16[i] = result; \
743 break; \
744 case 1: \
745 r->u8[i] = result; \
746 break; \
747 } \
748 all &= result; \
749 none |= result; \
750 } \
751 if (record) { \
752 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
753 } \
754 }
755#define VCMP(suffix, compare, element) \
756 VCMP_DO(suffix, compare, element, 0) \
757 VCMP_DO(suffix##_dot, compare, element, 1)
758VCMP(equb, ==, u8)
759VCMP(equh, ==, u16)
760VCMP(equw, ==, u32)
6f3dab41 761VCMP(equd, ==, u64)
64654ded
BS
762VCMP(gtub, >, u8)
763VCMP(gtuh, >, u16)
764VCMP(gtuw, >, u32)
6f3dab41 765VCMP(gtud, >, u64)
64654ded
BS
766VCMP(gtsb, >, s8)
767VCMP(gtsh, >, s16)
768VCMP(gtsw, >, s32)
6f3dab41 769VCMP(gtsd, >, s64)
64654ded
BS
770#undef VCMP_DO
771#undef VCMP
772
0fa59364
RS
773#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
774void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
f7cc8466
SB
775 ppc_avr_t *a, ppc_avr_t *b) \
776{ \
777 etype ones = (etype)-1; \
778 etype all = ones; \
0fa59364 779 etype result, none = 0; \
f7cc8466
SB
780 int i; \
781 \
782 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
0fa59364
RS
783 if (cmpzero) { \
784 result = ((a->element[i] == 0) \
f7cc8466
SB
785 || (b->element[i] == 0) \
786 || (a->element[i] != b->element[i]) ? \
787 ones : 0x0); \
0fa59364
RS
788 } else { \
789 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
790 } \
f7cc8466
SB
791 r->element[i] = result; \
792 all &= result; \
793 none |= result; \
794 } \
795 if (record) { \
796 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
797 } \
798}
799
800/* VCMPNEZ - Vector compare not equal to zero
801 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
802 * element - element type to access from vector
803 */
0fa59364
RS
804#define VCMPNE(suffix, element, etype, cmpzero) \
805 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
806 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
807VCMPNE(zb, u8, uint8_t, 1)
808VCMPNE(zh, u16, uint16_t, 1)
809VCMPNE(zw, u32, uint32_t, 1)
810VCMPNE(b, u8, uint8_t, 0)
811VCMPNE(h, u16, uint16_t, 0)
812VCMPNE(w, u32, uint32_t, 0)
813#undef VCMPNE_DO
814#undef VCMPNE
f7cc8466 815
64654ded 816#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
817 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
818 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
819 { \
820 uint32_t ones = (uint32_t)-1; \
821 uint32_t all = ones; \
822 uint32_t none = 0; \
823 int i; \
824 \
825 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
826 uint32_t result; \
827 int rel = float32_compare_quiet(a->f[i], b->f[i], \
828 &env->vec_status); \
829 if (rel == float_relation_unordered) { \
830 result = 0; \
831 } else if (rel compare order) { \
832 result = ones; \
833 } else { \
834 result = 0; \
835 } \
836 r->u32[i] = result; \
837 all &= result; \
838 none |= result; \
839 } \
840 if (record) { \
841 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
842 } \
843 }
844#define VCMPFP(suffix, compare, order) \
845 VCMPFP_DO(suffix, compare, order, 0) \
846 VCMPFP_DO(suffix##_dot, compare, order, 1)
847VCMPFP(eqfp, ==, float_relation_equal)
848VCMPFP(gefp, !=, float_relation_less)
849VCMPFP(gtfp, ==, float_relation_greater)
850#undef VCMPFP_DO
851#undef VCMPFP
852
d15f74fb
BS
853static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
854 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
855{
856 int i;
857 int all_in = 0;
858
859 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
860 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
861 if (le_rel == float_relation_unordered) {
862 r->u32[i] = 0xc0000000;
4007b8de 863 all_in = 1;
64654ded
BS
864 } else {
865 float32 bneg = float32_chs(b->f[i]);
866 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
867 int le = le_rel != float_relation_greater;
868 int ge = ge_rel != float_relation_less;
869
870 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
871 all_in |= (!le | !ge);
872 }
873 }
874 if (record) {
875 env->crf[6] = (all_in == 0) << 1;
876 }
877}
878
d15f74fb 879void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 880{
d15f74fb 881 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
882}
883
d15f74fb
BS
884void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
885 ppc_avr_t *b)
64654ded 886{
d15f74fb 887 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
888}
889
890#define VCT(suffix, satcvt, element) \
d15f74fb
BS
891 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
892 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
893 { \
894 int i; \
895 int sat = 0; \
896 float_status s = env->vec_status; \
897 \
898 set_float_rounding_mode(float_round_to_zero, &s); \
899 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
900 if (float32_is_any_nan(b->f[i])) { \
901 r->element[i] = 0; \
902 } else { \
903 float64 t = float32_to_float64(b->f[i], &s); \
904 int64_t j; \
905 \
906 t = float64_scalbn(t, uim, &s); \
907 j = float64_to_int64(t, &s); \
908 r->element[i] = satcvt(j, &sat); \
909 } \
910 } \
911 if (sat) { \
912 env->vscr |= (1 << VSCR_SAT); \
913 } \
914 }
915VCT(uxs, cvtsduw, u32)
916VCT(sxs, cvtsdsw, s32)
917#undef VCT
918
4879538c
RS
919target_ulong helper_vclzlsbb(ppc_avr_t *r)
920{
921 target_ulong count = 0;
922 int i;
923 VECTOR_FOR_INORDER_I(i, u8) {
924 if (r->u8[i] & 0x01) {
925 break;
926 }
927 count++;
928 }
929 return count;
930}
931
932target_ulong helper_vctzlsbb(ppc_avr_t *r)
933{
934 target_ulong count = 0;
935 int i;
936#if defined(HOST_WORDS_BIGENDIAN)
937 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
938#else
939 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
940#endif
941 if (r->u8[i] & 0x01) {
942 break;
943 }
944 count++;
945 }
946 return count;
947}
948
d15f74fb
BS
949void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
950 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
951{
952 int sat = 0;
953 int i;
954
955 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
956 int32_t prod = a->s16[i] * b->s16[i];
957 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
958
959 r->s16[i] = cvtswsh(t, &sat);
960 }
961
962 if (sat) {
963 env->vscr |= (1 << VSCR_SAT);
964 }
965}
966
d15f74fb
BS
967void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
968 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
969{
970 int sat = 0;
971 int i;
972
973 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
974 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
975 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
976 r->s16[i] = cvtswsh(t, &sat);
977 }
978
979 if (sat) {
980 env->vscr |= (1 << VSCR_SAT);
981 }
982}
983
984#define VMINMAX_DO(name, compare, element) \
985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 { \
987 int i; \
988 \
989 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
990 if (a->element[i] compare b->element[i]) { \
991 r->element[i] = b->element[i]; \
992 } else { \
993 r->element[i] = a->element[i]; \
994 } \
995 } \
996 }
997#define VMINMAX(suffix, element) \
998 VMINMAX_DO(min##suffix, >, element) \
999 VMINMAX_DO(max##suffix, <, element)
1000VMINMAX(sb, s8)
1001VMINMAX(sh, s16)
1002VMINMAX(sw, s32)
8203e31b 1003VMINMAX(sd, s64)
64654ded
BS
1004VMINMAX(ub, u8)
1005VMINMAX(uh, u16)
1006VMINMAX(uw, u32)
8203e31b 1007VMINMAX(ud, u64)
64654ded
BS
1008#undef VMINMAX_DO
1009#undef VMINMAX
1010
64654ded
BS
1011void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1012{
1013 int i;
1014
1015 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1016 int32_t prod = a->s16[i] * b->s16[i];
1017 r->s16[i] = (int16_t) (prod + c->s16[i]);
1018 }
1019}
1020
1021#define VMRG_DO(name, element, highp) \
1022 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1023 { \
1024 ppc_avr_t result; \
1025 int i; \
1026 size_t n_elems = ARRAY_SIZE(r->element); \
1027 \
1028 for (i = 0; i < n_elems / 2; i++) { \
1029 if (highp) { \
1030 result.element[i*2+HI_IDX] = a->element[i]; \
1031 result.element[i*2+LO_IDX] = b->element[i]; \
1032 } else { \
1033 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1034 b->element[n_elems - i - 1]; \
1035 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1036 a->element[n_elems - i - 1]; \
1037 } \
1038 } \
1039 *r = result; \
1040 }
1041#if defined(HOST_WORDS_BIGENDIAN)
1042#define MRGHI 0
1043#define MRGLO 1
1044#else
1045#define MRGHI 1
1046#define MRGLO 0
1047#endif
1048#define VMRG(suffix, element) \
1049 VMRG_DO(mrgl##suffix, element, MRGHI) \
1050 VMRG_DO(mrgh##suffix, element, MRGLO)
1051VMRG(b, u8)
1052VMRG(h, u16)
1053VMRG(w, u32)
1054#undef VMRG_DO
1055#undef VMRG
1056#undef MRGHI
1057#undef MRGLO
1058
d15f74fb
BS
1059void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1060 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1061{
1062 int32_t prod[16];
1063 int i;
1064
1065 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1066 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1067 }
1068
1069 VECTOR_FOR_INORDER_I(i, s32) {
1070 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1071 prod[4 * i + 2] + prod[4 * i + 3];
1072 }
1073}
1074
d15f74fb
BS
1075void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1076 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1077{
1078 int32_t prod[8];
1079 int i;
1080
1081 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1082 prod[i] = a->s16[i] * b->s16[i];
1083 }
1084
1085 VECTOR_FOR_INORDER_I(i, s32) {
1086 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1087 }
1088}
1089
d15f74fb
BS
1090void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1091 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1092{
1093 int32_t prod[8];
1094 int i;
1095 int sat = 0;
1096
1097 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1098 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1099 }
1100
1101 VECTOR_FOR_INORDER_I(i, s32) {
1102 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1103
1104 r->u32[i] = cvtsdsw(t, &sat);
1105 }
1106
1107 if (sat) {
1108 env->vscr |= (1 << VSCR_SAT);
1109 }
1110}
1111
d15f74fb
BS
1112void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1113 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1114{
1115 uint16_t prod[16];
1116 int i;
1117
1118 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1119 prod[i] = a->u8[i] * b->u8[i];
1120 }
1121
1122 VECTOR_FOR_INORDER_I(i, u32) {
1123 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1124 prod[4 * i + 2] + prod[4 * i + 3];
1125 }
1126}
1127
d15f74fb
BS
1128void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1129 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1130{
1131 uint32_t prod[8];
1132 int i;
1133
1134 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1135 prod[i] = a->u16[i] * b->u16[i];
1136 }
1137
1138 VECTOR_FOR_INORDER_I(i, u32) {
1139 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1140 }
1141}
1142
d15f74fb
BS
1143void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1144 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1145{
1146 uint32_t prod[8];
1147 int i;
1148 int sat = 0;
1149
1150 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1151 prod[i] = a->u16[i] * b->u16[i];
1152 }
1153
1154 VECTOR_FOR_INORDER_I(i, s32) {
1155 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1156
1157 r->u32[i] = cvtuduw(t, &sat);
1158 }
1159
1160 if (sat) {
1161 env->vscr |= (1 << VSCR_SAT);
1162 }
1163}
1164
aa9e930c 1165#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1166 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1167 { \
1168 int i; \
1169 \
1170 VECTOR_FOR_INORDER_I(i, prod_element) { \
1171 if (evenp) { \
aa9e930c
TM
1172 r->prod_element[i] = \
1173 (cast)a->mul_element[i * 2 + HI_IDX] * \
1174 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1175 } else { \
aa9e930c
TM
1176 r->prod_element[i] = \
1177 (cast)a->mul_element[i * 2 + LO_IDX] * \
1178 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1179 } \
1180 } \
1181 }
aa9e930c
TM
1182#define VMUL(suffix, mul_element, prod_element, cast) \
1183 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1184 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1185VMUL(sb, s8, s16, int16_t)
1186VMUL(sh, s16, s32, int32_t)
63be0936 1187VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1188VMUL(ub, u8, u16, uint16_t)
1189VMUL(uh, u16, u32, uint32_t)
63be0936 1190VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1191#undef VMUL_DO
1192#undef VMUL
1193
d15f74fb
BS
1194void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1195 ppc_avr_t *c)
64654ded
BS
1196{
1197 ppc_avr_t result;
1198 int i;
1199
1200 VECTOR_FOR_INORDER_I(i, u8) {
1201 int s = c->u8[i] & 0x1f;
1202#if defined(HOST_WORDS_BIGENDIAN)
1203 int index = s & 0xf;
1204#else
1205 int index = 15 - (s & 0xf);
1206#endif
1207
1208 if (s & 0x10) {
1209 result.u8[i] = b->u8[index];
1210 } else {
1211 result.u8[i] = a->u8[index];
1212 }
1213 }
1214 *r = result;
1215}
1216
ab045436
RS
1217void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1218 ppc_avr_t *c)
1219{
1220 ppc_avr_t result;
1221 int i;
1222
1223 VECTOR_FOR_INORDER_I(i, u8) {
1224 int s = c->u8[i] & 0x1f;
1225#if defined(HOST_WORDS_BIGENDIAN)
1226 int index = 15 - (s & 0xf);
1227#else
1228 int index = s & 0xf;
1229#endif
1230
1231 if (s & 0x10) {
1232 result.u8[i] = a->u8[index];
1233 } else {
1234 result.u8[i] = b->u8[index];
1235 }
1236 }
1237 *r = result;
1238}
1239
4d82038e
TM
1240#if defined(HOST_WORDS_BIGENDIAN)
1241#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1242#define VBPERMD_INDEX(i) (i)
4d82038e 1243#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
01fe9a47 1244#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
4d82038e
TM
1245#else
1246#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
01fe9a47 1247#define VBPERMD_INDEX(i) (1 - i)
4d82038e 1248#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
01fe9a47
RS
1249#define EXTRACT_BIT(avr, i, index) \
1250 (extract64((avr)->u64[1 - i], 63 - index, 1))
4d82038e
TM
1251#endif
1252
01fe9a47
RS
1253void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1254{
1255 int i, j;
1256 ppc_avr_t result = { .u64 = { 0, 0 } };
1257 VECTOR_FOR_INORDER_I(i, u64) {
1258 for (j = 0; j < 8; j++) {
1259 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1260 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1261 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1262 }
1263 }
1264 }
1265 *r = result;
1266}
1267
4d82038e
TM
1268void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1269{
1270 int i;
1271 uint64_t perm = 0;
1272
1273 VECTOR_FOR_INORDER_I(i, u8) {
1274 int index = VBPERMQ_INDEX(b, i);
1275
1276 if (index < 128) {
1277 uint64_t mask = (1ull << (63-(index & 0x3F)));
1278 if (a->u64[VBPERMQ_DW(index)] & mask) {
1279 perm |= (0x8000 >> i);
1280 }
1281 }
1282 }
1283
1284 r->u64[HI_IDX] = perm;
1285 r->u64[LO_IDX] = 0;
1286}
1287
1288#undef VBPERMQ_INDEX
1289#undef VBPERMQ_DW
1290
cfd54a04 1291static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1292 0x0000000000000000ull, /* 00 */
1293 0x0000000000000080ull, /* 01 */
1294 0x0000000000008000ull, /* 02 */
1295 0x0000000000008080ull, /* 03 */
1296 0x0000000000800000ull, /* 04 */
1297 0x0000000000800080ull, /* 05 */
1298 0x0000000000808000ull, /* 06 */
1299 0x0000000000808080ull, /* 07 */
1300 0x0000000080000000ull, /* 08 */
1301 0x0000000080000080ull, /* 09 */
1302 0x0000000080008000ull, /* 0A */
1303 0x0000000080008080ull, /* 0B */
1304 0x0000000080800000ull, /* 0C */
1305 0x0000000080800080ull, /* 0D */
1306 0x0000000080808000ull, /* 0E */
1307 0x0000000080808080ull, /* 0F */
1308 0x0000008000000000ull, /* 10 */
1309 0x0000008000000080ull, /* 11 */
1310 0x0000008000008000ull, /* 12 */
1311 0x0000008000008080ull, /* 13 */
1312 0x0000008000800000ull, /* 14 */
1313 0x0000008000800080ull, /* 15 */
1314 0x0000008000808000ull, /* 16 */
1315 0x0000008000808080ull, /* 17 */
1316 0x0000008080000000ull, /* 18 */
1317 0x0000008080000080ull, /* 19 */
1318 0x0000008080008000ull, /* 1A */
1319 0x0000008080008080ull, /* 1B */
1320 0x0000008080800000ull, /* 1C */
1321 0x0000008080800080ull, /* 1D */
1322 0x0000008080808000ull, /* 1E */
1323 0x0000008080808080ull, /* 1F */
1324 0x0000800000000000ull, /* 20 */
1325 0x0000800000000080ull, /* 21 */
1326 0x0000800000008000ull, /* 22 */
1327 0x0000800000008080ull, /* 23 */
1328 0x0000800000800000ull, /* 24 */
1329 0x0000800000800080ull, /* 25 */
1330 0x0000800000808000ull, /* 26 */
1331 0x0000800000808080ull, /* 27 */
1332 0x0000800080000000ull, /* 28 */
1333 0x0000800080000080ull, /* 29 */
1334 0x0000800080008000ull, /* 2A */
1335 0x0000800080008080ull, /* 2B */
1336 0x0000800080800000ull, /* 2C */
1337 0x0000800080800080ull, /* 2D */
1338 0x0000800080808000ull, /* 2E */
1339 0x0000800080808080ull, /* 2F */
1340 0x0000808000000000ull, /* 30 */
1341 0x0000808000000080ull, /* 31 */
1342 0x0000808000008000ull, /* 32 */
1343 0x0000808000008080ull, /* 33 */
1344 0x0000808000800000ull, /* 34 */
1345 0x0000808000800080ull, /* 35 */
1346 0x0000808000808000ull, /* 36 */
1347 0x0000808000808080ull, /* 37 */
1348 0x0000808080000000ull, /* 38 */
1349 0x0000808080000080ull, /* 39 */
1350 0x0000808080008000ull, /* 3A */
1351 0x0000808080008080ull, /* 3B */
1352 0x0000808080800000ull, /* 3C */
1353 0x0000808080800080ull, /* 3D */
1354 0x0000808080808000ull, /* 3E */
1355 0x0000808080808080ull, /* 3F */
1356 0x0080000000000000ull, /* 40 */
1357 0x0080000000000080ull, /* 41 */
1358 0x0080000000008000ull, /* 42 */
1359 0x0080000000008080ull, /* 43 */
1360 0x0080000000800000ull, /* 44 */
1361 0x0080000000800080ull, /* 45 */
1362 0x0080000000808000ull, /* 46 */
1363 0x0080000000808080ull, /* 47 */
1364 0x0080000080000000ull, /* 48 */
1365 0x0080000080000080ull, /* 49 */
1366 0x0080000080008000ull, /* 4A */
1367 0x0080000080008080ull, /* 4B */
1368 0x0080000080800000ull, /* 4C */
1369 0x0080000080800080ull, /* 4D */
1370 0x0080000080808000ull, /* 4E */
1371 0x0080000080808080ull, /* 4F */
1372 0x0080008000000000ull, /* 50 */
1373 0x0080008000000080ull, /* 51 */
1374 0x0080008000008000ull, /* 52 */
1375 0x0080008000008080ull, /* 53 */
1376 0x0080008000800000ull, /* 54 */
1377 0x0080008000800080ull, /* 55 */
1378 0x0080008000808000ull, /* 56 */
1379 0x0080008000808080ull, /* 57 */
1380 0x0080008080000000ull, /* 58 */
1381 0x0080008080000080ull, /* 59 */
1382 0x0080008080008000ull, /* 5A */
1383 0x0080008080008080ull, /* 5B */
1384 0x0080008080800000ull, /* 5C */
1385 0x0080008080800080ull, /* 5D */
1386 0x0080008080808000ull, /* 5E */
1387 0x0080008080808080ull, /* 5F */
1388 0x0080800000000000ull, /* 60 */
1389 0x0080800000000080ull, /* 61 */
1390 0x0080800000008000ull, /* 62 */
1391 0x0080800000008080ull, /* 63 */
1392 0x0080800000800000ull, /* 64 */
1393 0x0080800000800080ull, /* 65 */
1394 0x0080800000808000ull, /* 66 */
1395 0x0080800000808080ull, /* 67 */
1396 0x0080800080000000ull, /* 68 */
1397 0x0080800080000080ull, /* 69 */
1398 0x0080800080008000ull, /* 6A */
1399 0x0080800080008080ull, /* 6B */
1400 0x0080800080800000ull, /* 6C */
1401 0x0080800080800080ull, /* 6D */
1402 0x0080800080808000ull, /* 6E */
1403 0x0080800080808080ull, /* 6F */
1404 0x0080808000000000ull, /* 70 */
1405 0x0080808000000080ull, /* 71 */
1406 0x0080808000008000ull, /* 72 */
1407 0x0080808000008080ull, /* 73 */
1408 0x0080808000800000ull, /* 74 */
1409 0x0080808000800080ull, /* 75 */
1410 0x0080808000808000ull, /* 76 */
1411 0x0080808000808080ull, /* 77 */
1412 0x0080808080000000ull, /* 78 */
1413 0x0080808080000080ull, /* 79 */
1414 0x0080808080008000ull, /* 7A */
1415 0x0080808080008080ull, /* 7B */
1416 0x0080808080800000ull, /* 7C */
1417 0x0080808080800080ull, /* 7D */
1418 0x0080808080808000ull, /* 7E */
1419 0x0080808080808080ull, /* 7F */
1420 0x8000000000000000ull, /* 80 */
1421 0x8000000000000080ull, /* 81 */
1422 0x8000000000008000ull, /* 82 */
1423 0x8000000000008080ull, /* 83 */
1424 0x8000000000800000ull, /* 84 */
1425 0x8000000000800080ull, /* 85 */
1426 0x8000000000808000ull, /* 86 */
1427 0x8000000000808080ull, /* 87 */
1428 0x8000000080000000ull, /* 88 */
1429 0x8000000080000080ull, /* 89 */
1430 0x8000000080008000ull, /* 8A */
1431 0x8000000080008080ull, /* 8B */
1432 0x8000000080800000ull, /* 8C */
1433 0x8000000080800080ull, /* 8D */
1434 0x8000000080808000ull, /* 8E */
1435 0x8000000080808080ull, /* 8F */
1436 0x8000008000000000ull, /* 90 */
1437 0x8000008000000080ull, /* 91 */
1438 0x8000008000008000ull, /* 92 */
1439 0x8000008000008080ull, /* 93 */
1440 0x8000008000800000ull, /* 94 */
1441 0x8000008000800080ull, /* 95 */
1442 0x8000008000808000ull, /* 96 */
1443 0x8000008000808080ull, /* 97 */
1444 0x8000008080000000ull, /* 98 */
1445 0x8000008080000080ull, /* 99 */
1446 0x8000008080008000ull, /* 9A */
1447 0x8000008080008080ull, /* 9B */
1448 0x8000008080800000ull, /* 9C */
1449 0x8000008080800080ull, /* 9D */
1450 0x8000008080808000ull, /* 9E */
1451 0x8000008080808080ull, /* 9F */
1452 0x8000800000000000ull, /* A0 */
1453 0x8000800000000080ull, /* A1 */
1454 0x8000800000008000ull, /* A2 */
1455 0x8000800000008080ull, /* A3 */
1456 0x8000800000800000ull, /* A4 */
1457 0x8000800000800080ull, /* A5 */
1458 0x8000800000808000ull, /* A6 */
1459 0x8000800000808080ull, /* A7 */
1460 0x8000800080000000ull, /* A8 */
1461 0x8000800080000080ull, /* A9 */
1462 0x8000800080008000ull, /* AA */
1463 0x8000800080008080ull, /* AB */
1464 0x8000800080800000ull, /* AC */
1465 0x8000800080800080ull, /* AD */
1466 0x8000800080808000ull, /* AE */
1467 0x8000800080808080ull, /* AF */
1468 0x8000808000000000ull, /* B0 */
1469 0x8000808000000080ull, /* B1 */
1470 0x8000808000008000ull, /* B2 */
1471 0x8000808000008080ull, /* B3 */
1472 0x8000808000800000ull, /* B4 */
1473 0x8000808000800080ull, /* B5 */
1474 0x8000808000808000ull, /* B6 */
1475 0x8000808000808080ull, /* B7 */
1476 0x8000808080000000ull, /* B8 */
1477 0x8000808080000080ull, /* B9 */
1478 0x8000808080008000ull, /* BA */
1479 0x8000808080008080ull, /* BB */
1480 0x8000808080800000ull, /* BC */
1481 0x8000808080800080ull, /* BD */
1482 0x8000808080808000ull, /* BE */
1483 0x8000808080808080ull, /* BF */
1484 0x8080000000000000ull, /* C0 */
1485 0x8080000000000080ull, /* C1 */
1486 0x8080000000008000ull, /* C2 */
1487 0x8080000000008080ull, /* C3 */
1488 0x8080000000800000ull, /* C4 */
1489 0x8080000000800080ull, /* C5 */
1490 0x8080000000808000ull, /* C6 */
1491 0x8080000000808080ull, /* C7 */
1492 0x8080000080000000ull, /* C8 */
1493 0x8080000080000080ull, /* C9 */
1494 0x8080000080008000ull, /* CA */
1495 0x8080000080008080ull, /* CB */
1496 0x8080000080800000ull, /* CC */
1497 0x8080000080800080ull, /* CD */
1498 0x8080000080808000ull, /* CE */
1499 0x8080000080808080ull, /* CF */
1500 0x8080008000000000ull, /* D0 */
1501 0x8080008000000080ull, /* D1 */
1502 0x8080008000008000ull, /* D2 */
1503 0x8080008000008080ull, /* D3 */
1504 0x8080008000800000ull, /* D4 */
1505 0x8080008000800080ull, /* D5 */
1506 0x8080008000808000ull, /* D6 */
1507 0x8080008000808080ull, /* D7 */
1508 0x8080008080000000ull, /* D8 */
1509 0x8080008080000080ull, /* D9 */
1510 0x8080008080008000ull, /* DA */
1511 0x8080008080008080ull, /* DB */
1512 0x8080008080800000ull, /* DC */
1513 0x8080008080800080ull, /* DD */
1514 0x8080008080808000ull, /* DE */
1515 0x8080008080808080ull, /* DF */
1516 0x8080800000000000ull, /* E0 */
1517 0x8080800000000080ull, /* E1 */
1518 0x8080800000008000ull, /* E2 */
1519 0x8080800000008080ull, /* E3 */
1520 0x8080800000800000ull, /* E4 */
1521 0x8080800000800080ull, /* E5 */
1522 0x8080800000808000ull, /* E6 */
1523 0x8080800000808080ull, /* E7 */
1524 0x8080800080000000ull, /* E8 */
1525 0x8080800080000080ull, /* E9 */
1526 0x8080800080008000ull, /* EA */
1527 0x8080800080008080ull, /* EB */
1528 0x8080800080800000ull, /* EC */
1529 0x8080800080800080ull, /* ED */
1530 0x8080800080808000ull, /* EE */
1531 0x8080800080808080ull, /* EF */
1532 0x8080808000000000ull, /* F0 */
1533 0x8080808000000080ull, /* F1 */
1534 0x8080808000008000ull, /* F2 */
1535 0x8080808000008080ull, /* F3 */
1536 0x8080808000800000ull, /* F4 */
1537 0x8080808000800080ull, /* F5 */
1538 0x8080808000808000ull, /* F6 */
1539 0x8080808000808080ull, /* F7 */
1540 0x8080808080000000ull, /* F8 */
1541 0x8080808080000080ull, /* F9 */
1542 0x8080808080008000ull, /* FA */
1543 0x8080808080008080ull, /* FB */
1544 0x8080808080800000ull, /* FC */
1545 0x8080808080800080ull, /* FD */
1546 0x8080808080808000ull, /* FE */
1547 0x8080808080808080ull, /* FF */
1548};
1549
1550void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1551{
1552 int i;
1553 uint64_t t[2] = { 0, 0 };
1554
1555 VECTOR_FOR_INORDER_I(i, u8) {
1556#if defined(HOST_WORDS_BIGENDIAN)
1557 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1558#else
1559 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1560#endif
1561 }
1562
1563 r->u64[0] = t[0];
1564 r->u64[1] = t[1];
1565}
1566
b8476fc7
TM
1567#define PMSUM(name, srcfld, trgfld, trgtyp) \
1568void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1569{ \
1570 int i, j; \
1571 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1572 \
1573 VECTOR_FOR_INORDER_I(i, srcfld) { \
1574 prod[i] = 0; \
1575 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1576 if (a->srcfld[i] & (1ull<<j)) { \
1577 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1578 } \
1579 } \
1580 } \
1581 \
1582 VECTOR_FOR_INORDER_I(i, trgfld) { \
1583 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1584 } \
1585}
1586
1587PMSUM(vpmsumb, u8, u16, uint16_t)
1588PMSUM(vpmsumh, u16, u32, uint32_t)
1589PMSUM(vpmsumw, u32, u64, uint64_t)
1590
1591void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1592{
1593
1594#ifdef CONFIG_INT128
1595 int i, j;
1596 __uint128_t prod[2];
1597
1598 VECTOR_FOR_INORDER_I(i, u64) {
1599 prod[i] = 0;
1600 for (j = 0; j < 64; j++) {
1601 if (a->u64[i] & (1ull<<j)) {
1602 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1603 }
1604 }
1605 }
1606
1607 r->u128 = prod[0] ^ prod[1];
1608
1609#else
1610 int i, j;
1611 ppc_avr_t prod[2];
1612
1613 VECTOR_FOR_INORDER_I(i, u64) {
1614 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1615 for (j = 0; j < 64; j++) {
1616 if (a->u64[i] & (1ull<<j)) {
1617 ppc_avr_t bshift;
1618 if (j == 0) {
1619 bshift.u64[HI_IDX] = 0;
1620 bshift.u64[LO_IDX] = b->u64[i];
1621 } else {
1622 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1623 bshift.u64[LO_IDX] = b->u64[i] << j;
1624 }
1625 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1626 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1627 }
1628 }
1629 }
1630
1631 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1632 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1633#endif
1634}
1635
1636
64654ded
BS
1637#if defined(HOST_WORDS_BIGENDIAN)
1638#define PKBIG 1
1639#else
1640#define PKBIG 0
1641#endif
1642void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1643{
1644 int i, j;
1645 ppc_avr_t result;
1646#if defined(HOST_WORDS_BIGENDIAN)
1647 const ppc_avr_t *x[2] = { a, b };
1648#else
1649 const ppc_avr_t *x[2] = { b, a };
1650#endif
1651
1652 VECTOR_FOR_INORDER_I(i, u64) {
1653 VECTOR_FOR_INORDER_I(j, u32) {
1654 uint32_t e = x[i]->u32[j];
1655
1656 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1657 ((e >> 6) & 0x3e0) |
1658 ((e >> 3) & 0x1f));
1659 }
1660 }
1661 *r = result;
1662}
1663
1664#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1665 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1666 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1667 { \
1668 int i; \
1669 int sat = 0; \
1670 ppc_avr_t result; \
1671 ppc_avr_t *a0 = PKBIG ? a : b; \
1672 ppc_avr_t *a1 = PKBIG ? b : a; \
1673 \
1674 VECTOR_FOR_INORDER_I(i, from) { \
1675 result.to[i] = cvt(a0->from[i], &sat); \
1676 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1677 } \
1678 *r = result; \
1679 if (dosat && sat) { \
1680 env->vscr |= (1 << VSCR_SAT); \
1681 } \
1682 }
1683#define I(x, y) (x)
1684VPK(shss, s16, s8, cvtshsb, 1)
1685VPK(shus, s16, u8, cvtshub, 1)
1686VPK(swss, s32, s16, cvtswsh, 1)
1687VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1688VPK(sdss, s64, s32, cvtsdsw, 1)
1689VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1690VPK(uhus, u16, u8, cvtuhub, 1)
1691VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1692VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1693VPK(uhum, u16, u8, I, 0)
1694VPK(uwum, u32, u16, I, 0)
024215b2 1695VPK(udum, u64, u32, I, 0)
64654ded
BS
1696#undef I
1697#undef VPK
1698#undef PKBIG
1699
d15f74fb 1700void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1701{
1702 int i;
1703
1704 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1705 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1706 }
1707}
1708
1709#define VRFI(suffix, rounding) \
d15f74fb
BS
1710 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1711 ppc_avr_t *b) \
64654ded
BS
1712 { \
1713 int i; \
1714 float_status s = env->vec_status; \
1715 \
1716 set_float_rounding_mode(rounding, &s); \
1717 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1718 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1719 } \
1720 }
1721VRFI(n, float_round_nearest_even)
1722VRFI(m, float_round_down)
1723VRFI(p, float_round_up)
1724VRFI(z, float_round_to_zero)
1725#undef VRFI
1726
818692ff 1727#define VROTATE(suffix, element, mask) \
64654ded
BS
1728 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1729 { \
1730 int i; \
1731 \
1732 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1733 unsigned int shift = b->element[i] & mask; \
1734 r->element[i] = (a->element[i] << shift) | \
1735 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1736 } \
1737 }
818692ff
TM
1738VROTATE(b, u8, 0x7)
1739VROTATE(h, u16, 0xF)
1740VROTATE(w, u32, 0x1F)
2fdf78e6 1741VROTATE(d, u64, 0x3F)
64654ded
BS
1742#undef VROTATE
1743
d15f74fb 1744void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1745{
1746 int i;
1747
1748 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1749 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1750
ef9bd150 1751 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1752 }
1753}
1754
09a245e1 1755#define VRLMI(name, size, element, insert) \
3e00884f
GS
1756void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1757{ \
1758 int i; \
1759 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1760 uint##size##_t src1 = a->element[i]; \
1761 uint##size##_t src2 = b->element[i]; \
1762 uint##size##_t src3 = r->element[i]; \
1763 uint##size##_t begin, end, shift, mask, rot_val; \
1764 \
1765 shift = extract##size(src2, 0, 6); \
1766 end = extract##size(src2, 8, 6); \
1767 begin = extract##size(src2, 16, 6); \
1768 rot_val = rol##size(src1, shift); \
1769 mask = mask_u##size(begin, end); \
09a245e1
BR
1770 if (insert) { \
1771 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1772 } else { \
1773 r->element[i] = (rot_val & mask); \
1774 } \
3e00884f
GS
1775 } \
1776}
1777
09a245e1
BR
1778VRLMI(vrldmi, 64, u64, 1);
1779VRLMI(vrlwmi, 32, u32, 1);
1780VRLMI(vrldnm, 64, u64, 0);
1781VRLMI(vrlwnm, 32, u32, 0);
3e00884f 1782
d15f74fb
BS
1783void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1784 ppc_avr_t *c)
64654ded
BS
1785{
1786 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1787 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1788}
1789
d15f74fb 1790void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1791{
1792 int i;
1793
1794 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1795 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1796 }
1797}
1798
d15f74fb 1799void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1800{
1801 int i;
1802
1803 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1804 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1805 }
1806}
1807
64654ded
BS
1808/* The specification says that the results are undefined if all of the
1809 * shift counts are not identical. We check to make sure that they are
1810 * to conform to what real hardware appears to do. */
1811#define VSHIFT(suffix, leftp) \
1812 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1813 { \
1814 int shift = b->u8[LO_IDX*15] & 0x7; \
1815 int doit = 1; \
1816 int i; \
1817 \
1818 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1819 doit = doit && ((b->u8[i] & 0x7) == shift); \
1820 } \
1821 if (doit) { \
1822 if (shift == 0) { \
1823 *r = *a; \
1824 } else if (leftp) { \
1825 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1826 \
1827 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1828 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1829 } else { \
1830 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1831 \
1832 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1833 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1834 } \
1835 } \
1836 }
24e669ba
TM
1837VSHIFT(l, 1)
1838VSHIFT(r, 0)
64654ded 1839#undef VSHIFT
64654ded 1840
818692ff 1841#define VSL(suffix, element, mask) \
64654ded
BS
1842 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1843 { \
1844 int i; \
1845 \
1846 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1847 unsigned int shift = b->element[i] & mask; \
1848 \
1849 r->element[i] = a->element[i] << shift; \
1850 } \
1851 }
818692ff
TM
1852VSL(b, u8, 0x7)
1853VSL(h, u16, 0x0F)
1854VSL(w, u32, 0x1F)
2fdf78e6 1855VSL(d, u64, 0x3F)
64654ded
BS
1856#undef VSL
1857
5644a175
VAS
1858void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1859{
1860 int i;
1861 unsigned int shift, bytes, size;
1862
1863 size = ARRAY_SIZE(r->u8);
1864 for (i = 0; i < size; i++) {
1865 shift = b->u8[i] & 0x7; /* extract shift value */
1866 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1867 (((i + 1) < size) ? a->u8[i + 1] : 0);
1868 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1869 }
1870}
1871
4004c1db
VAS
1872void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1873{
1874 int i;
1875 unsigned int shift, bytes;
1876
1877 /* Use reverse order, as destination and source register can be same. Its
1878 * being modified in place saving temporary, reverse order will guarantee
1879 * that computed result is not fed back.
1880 */
1881 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1882 shift = b->u8[i] & 0x7; /* extract shift value */
1883 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1884 /* extract adjacent bytes */
1885 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1886 }
1887}
1888
64654ded
BS
1889void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1890{
1891 int sh = shift & 0xf;
1892 int i;
1893 ppc_avr_t result;
1894
1895#if defined(HOST_WORDS_BIGENDIAN)
1896 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1897 int index = sh + i;
1898 if (index > 0xf) {
1899 result.u8[i] = b->u8[index - 0x10];
1900 } else {
1901 result.u8[i] = a->u8[index];
1902 }
1903 }
1904#else
1905 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1906 int index = (16 - sh) + i;
1907 if (index > 0xf) {
1908 result.u8[i] = a->u8[index - 0x10];
1909 } else {
1910 result.u8[i] = b->u8[index];
1911 }
1912 }
1913#endif
1914 *r = result;
1915}
1916
1917void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1918{
1919 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1920
1921#if defined(HOST_WORDS_BIGENDIAN)
1922 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1923 memset(&r->u8[16-sh], 0, sh);
1924#else
1925 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1926 memset(&r->u8[0], 0, sh);
1927#endif
1928}
1929
1930/* Experimental testing shows that hardware masks the immediate. */
1931#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1932#if defined(HOST_WORDS_BIGENDIAN)
1933#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1934#else
1935#define SPLAT_ELEMENT(element) \
1936 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1937#endif
1938#define VSPLT(suffix, element) \
1939 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1940 { \
1941 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1942 int i; \
1943 \
1944 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1945 r->element[i] = s; \
1946 } \
1947 }
1948VSPLT(b, u8)
1949VSPLT(h, u16)
1950VSPLT(w, u32)
1951#undef VSPLT
1952#undef SPLAT_ELEMENT
1953#undef _SPLAT_MASKED
e7b1e06f
RS
1954#if defined(HOST_WORDS_BIGENDIAN)
1955#define VINSERT(suffix, element) \
1956 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1957 { \
1958 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1959 sizeof(r->element[0])); \
1960 }
1961#else
1962#define VINSERT(suffix, element) \
1963 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1964 { \
1965 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1966 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1967 }
1968#endif
1969VINSERT(b, u8)
1970VINSERT(h, u16)
1971VINSERT(w, u32)
1972VINSERT(d, u64)
1973#undef VINSERT
b5d569a1
RS
1974#if defined(HOST_WORDS_BIGENDIAN)
1975#define VEXTRACT(suffix, element) \
1976 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1977 { \
1978 uint32_t es = sizeof(r->element[0]); \
1979 memmove(&r->u8[8 - es], &b->u8[index], es); \
1980 memset(&r->u8[8], 0, 8); \
1981 memset(&r->u8[0], 0, 8 - es); \
1982 }
1983#else
1984#define VEXTRACT(suffix, element) \
1985 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1986 { \
1987 uint32_t es = sizeof(r->element[0]); \
1988 uint32_t s = (16 - index) - es; \
1989 memmove(&r->u8[8], &b->u8[s], es); \
1990 memset(&r->u8[0], 0, 8); \
1991 memset(&r->u8[8 + es], 0, 8 - es); \
1992 }
1993#endif
1994VEXTRACT(ub, u8)
1995VEXTRACT(uh, u16)
1996VEXTRACT(uw, u32)
1997VEXTRACT(d, u64)
1998#undef VEXTRACT
64654ded 1999
125a9b23
ND
2000#define VEXT_SIGNED(name, element, mask, cast, recast) \
2001void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2002{ \
2003 int i; \
2004 VECTOR_FOR_INORDER_I(i, element) { \
2005 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
2006 } \
2007}
2008VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2009VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2010VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2011VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2012VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2013#undef VEXT_SIGNED
2014
cc8b6e76
ND
2015#define VNEG(name, element) \
2016void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2017{ \
2018 int i; \
2019 VECTOR_FOR_INORDER_I(i, element) { \
2020 r->element[i] = -b->element[i]; \
2021 } \
2022}
2023VNEG(vnegw, s32)
2024VNEG(vnegd, s64)
2025#undef VNEG
2026
64654ded
BS
2027#define VSPLTI(suffix, element, splat_type) \
2028 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2029 { \
2030 splat_type x = (int8_t)(splat << 3) >> 3; \
2031 int i; \
2032 \
2033 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2034 r->element[i] = x; \
2035 } \
2036 }
2037VSPLTI(b, s8, int8_t)
2038VSPLTI(h, s16, int16_t)
2039VSPLTI(w, s32, int32_t)
2040#undef VSPLTI
2041
818692ff 2042#define VSR(suffix, element, mask) \
64654ded
BS
2043 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2044 { \
2045 int i; \
2046 \
2047 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 2048 unsigned int shift = b->element[i] & mask; \
64654ded
BS
2049 r->element[i] = a->element[i] >> shift; \
2050 } \
2051 }
818692ff
TM
2052VSR(ab, s8, 0x7)
2053VSR(ah, s16, 0xF)
2054VSR(aw, s32, 0x1F)
2fdf78e6 2055VSR(ad, s64, 0x3F)
818692ff
TM
2056VSR(b, u8, 0x7)
2057VSR(h, u16, 0xF)
2058VSR(w, u32, 0x1F)
2fdf78e6 2059VSR(d, u64, 0x3F)
64654ded
BS
2060#undef VSR
2061
2062void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2063{
2064 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2065
2066#if defined(HOST_WORDS_BIGENDIAN)
2067 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2068 memset(&r->u8[0], 0, sh);
2069#else
2070 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2071 memset(&r->u8[16 - sh], 0, sh);
2072#endif
2073}
2074
2075void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2076{
2077 int i;
2078
2079 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2080 r->u32[i] = a->u32[i] >= b->u32[i];
2081 }
2082}
2083
d15f74fb 2084void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2085{
2086 int64_t t;
2087 int i, upper;
2088 ppc_avr_t result;
2089 int sat = 0;
2090
2091#if defined(HOST_WORDS_BIGENDIAN)
2092 upper = ARRAY_SIZE(r->s32)-1;
2093#else
2094 upper = 0;
2095#endif
2096 t = (int64_t)b->s32[upper];
2097 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2098 t += a->s32[i];
2099 result.s32[i] = 0;
2100 }
2101 result.s32[upper] = cvtsdsw(t, &sat);
2102 *r = result;
2103
2104 if (sat) {
2105 env->vscr |= (1 << VSCR_SAT);
2106 }
2107}
2108
d15f74fb 2109void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2110{
2111 int i, j, upper;
2112 ppc_avr_t result;
2113 int sat = 0;
2114
2115#if defined(HOST_WORDS_BIGENDIAN)
2116 upper = 1;
2117#else
2118 upper = 0;
2119#endif
2120 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2121 int64_t t = (int64_t)b->s32[upper + i * 2];
2122
2123 result.u64[i] = 0;
2124 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2125 t += a->s32[2 * i + j];
2126 }
2127 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2128 }
2129
2130 *r = result;
2131 if (sat) {
2132 env->vscr |= (1 << VSCR_SAT);
2133 }
2134}
2135
d15f74fb 2136void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2137{
2138 int i, j;
2139 int sat = 0;
2140
2141 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2142 int64_t t = (int64_t)b->s32[i];
2143
2144 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2145 t += a->s8[4 * i + j];
2146 }
2147 r->s32[i] = cvtsdsw(t, &sat);
2148 }
2149
2150 if (sat) {
2151 env->vscr |= (1 << VSCR_SAT);
2152 }
2153}
2154
d15f74fb 2155void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2156{
2157 int sat = 0;
2158 int i;
2159
2160 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2161 int64_t t = (int64_t)b->s32[i];
2162
2163 t += a->s16[2 * i] + a->s16[2 * i + 1];
2164 r->s32[i] = cvtsdsw(t, &sat);
2165 }
2166
2167 if (sat) {
2168 env->vscr |= (1 << VSCR_SAT);
2169 }
2170}
2171
d15f74fb 2172void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2173{
2174 int i, j;
2175 int sat = 0;
2176
2177 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2178 uint64_t t = (uint64_t)b->u32[i];
2179
2180 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2181 t += a->u8[4 * i + j];
2182 }
2183 r->u32[i] = cvtuduw(t, &sat);
2184 }
2185
2186 if (sat) {
2187 env->vscr |= (1 << VSCR_SAT);
2188 }
2189}
2190
2191#if defined(HOST_WORDS_BIGENDIAN)
2192#define UPKHI 1
2193#define UPKLO 0
2194#else
2195#define UPKHI 0
2196#define UPKLO 1
2197#endif
2198#define VUPKPX(suffix, hi) \
2199 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2200 { \
2201 int i; \
2202 ppc_avr_t result; \
2203 \
2204 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2205 uint16_t e = b->u16[hi ? i : i+4]; \
2206 uint8_t a = (e >> 15) ? 0xff : 0; \
2207 uint8_t r = (e >> 10) & 0x1f; \
2208 uint8_t g = (e >> 5) & 0x1f; \
2209 uint8_t b = e & 0x1f; \
2210 \
2211 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2212 } \
2213 *r = result; \
2214 }
2215VUPKPX(lpx, UPKLO)
2216VUPKPX(hpx, UPKHI)
2217#undef VUPKPX
2218
2219#define VUPK(suffix, unpacked, packee, hi) \
2220 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2221 { \
2222 int i; \
2223 ppc_avr_t result; \
2224 \
2225 if (hi) { \
2226 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2227 result.unpacked[i] = b->packee[i]; \
2228 } \
2229 } else { \
2230 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2231 i++) { \
2232 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2233 } \
2234 } \
2235 *r = result; \
2236 }
2237VUPK(hsb, s16, s8, UPKHI)
2238VUPK(hsh, s32, s16, UPKHI)
4430e076 2239VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2240VUPK(lsb, s16, s8, UPKLO)
2241VUPK(lsh, s32, s16, UPKLO)
4430e076 2242VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2243#undef VUPK
2244#undef UPKHI
2245#undef UPKLO
2246
f293f04a
TM
2247#define VGENERIC_DO(name, element) \
2248 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2249 { \
2250 int i; \
2251 \
2252 VECTOR_FOR_INORDER_I(i, element) { \
2253 r->element[i] = name(b->element[i]); \
2254 } \
2255 }
2256
2257#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2258#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2259#define clzw(v) clz32((v))
2260#define clzd(v) clz64((v))
2261
2262VGENERIC_DO(clzb, u8)
2263VGENERIC_DO(clzh, u16)
2264VGENERIC_DO(clzw, u32)
2265VGENERIC_DO(clzd, u64)
2266
2267#undef clzb
2268#undef clzh
2269#undef clzw
2270#undef clzd
2271
a5ad8fbf
RS
2272#define ctzb(v) ((v) ? ctz32(v) : 8)
2273#define ctzh(v) ((v) ? ctz32(v) : 16)
2274#define ctzw(v) ctz32((v))
2275#define ctzd(v) ctz64((v))
2276
2277VGENERIC_DO(ctzb, u8)
2278VGENERIC_DO(ctzh, u16)
2279VGENERIC_DO(ctzw, u32)
2280VGENERIC_DO(ctzd, u64)
2281
2282#undef ctzb
2283#undef ctzh
2284#undef ctzw
2285#undef ctzd
2286
e13500b3
TM
2287#define popcntb(v) ctpop8(v)
2288#define popcnth(v) ctpop16(v)
2289#define popcntw(v) ctpop32(v)
2290#define popcntd(v) ctpop64(v)
2291
2292VGENERIC_DO(popcntb, u8)
2293VGENERIC_DO(popcnth, u16)
2294VGENERIC_DO(popcntw, u32)
2295VGENERIC_DO(popcntd, u64)
2296
2297#undef popcntb
2298#undef popcnth
2299#undef popcntw
2300#undef popcntd
f293f04a
TM
2301
2302#undef VGENERIC_DO
2303
b41da4eb
TM
2304#if defined(HOST_WORDS_BIGENDIAN)
2305#define QW_ONE { .u64 = { 0, 1 } }
2306#else
2307#define QW_ONE { .u64 = { 1, 0 } }
2308#endif
2309
2310#ifndef CONFIG_INT128
2311
2312static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2313{
2314 t->u64[0] = ~a.u64[0];
2315 t->u64[1] = ~a.u64[1];
2316}
2317
2318static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2319{
2320 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2321 return -1;
2322 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2323 return 1;
2324 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2325 return -1;
2326 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2327 return 1;
2328 } else {
2329 return 0;
2330 }
2331}
2332
2333static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2334{
2335 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2336 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2337 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2338}
2339
2340static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2341{
2342 ppc_avr_t not_a;
2343 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2344 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2345 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2346 avr_qw_not(&not_a, a);
2347 return avr_qw_cmpu(not_a, b) < 0;
2348}
2349
2350#endif
2351
2352void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2353{
2354#ifdef CONFIG_INT128
2355 r->u128 = a->u128 + b->u128;
2356#else
2357 avr_qw_add(r, *a, *b);
2358#endif
2359}
2360
2361void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2362{
2363#ifdef CONFIG_INT128
2364 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2365#else
2366
2367 if (c->u64[LO_IDX] & 1) {
2368 ppc_avr_t tmp;
2369
2370 tmp.u64[HI_IDX] = 0;
2371 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2372 avr_qw_add(&tmp, *a, tmp);
2373 avr_qw_add(r, tmp, *b);
2374 } else {
2375 avr_qw_add(r, *a, *b);
2376 }
2377#endif
2378}
2379
2380void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2381{
2382#ifdef CONFIG_INT128
2383 r->u128 = (~a->u128 < b->u128);
2384#else
2385 ppc_avr_t not_a;
2386
2387 avr_qw_not(&not_a, *a);
2388
2389 r->u64[HI_IDX] = 0;
2390 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2391#endif
2392}
2393
2394void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2395{
2396#ifdef CONFIG_INT128
2397 int carry_out = (~a->u128 < b->u128);
2398 if (!carry_out && (c->u128 & 1)) {
2399 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2400 ((a->u128 != 0) || (b->u128 != 0));
2401 }
2402 r->u128 = carry_out;
2403#else
2404
2405 int carry_in = c->u64[LO_IDX] & 1;
2406 int carry_out = 0;
2407 ppc_avr_t tmp;
2408
2409 carry_out = avr_qw_addc(&tmp, *a, *b);
2410
2411 if (!carry_out && carry_in) {
2412 ppc_avr_t one = QW_ONE;
2413 carry_out = avr_qw_addc(&tmp, tmp, one);
2414 }
2415 r->u64[HI_IDX] = 0;
2416 r->u64[LO_IDX] = carry_out;
2417#endif
2418}
2419
2420void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2421{
2422#ifdef CONFIG_INT128
2423 r->u128 = a->u128 - b->u128;
2424#else
2425 ppc_avr_t tmp;
2426 ppc_avr_t one = QW_ONE;
2427
2428 avr_qw_not(&tmp, *b);
2429 avr_qw_add(&tmp, *a, tmp);
2430 avr_qw_add(r, tmp, one);
2431#endif
2432}
2433
2434void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2435{
2436#ifdef CONFIG_INT128
2437 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2438#else
2439 ppc_avr_t tmp, sum;
2440
2441 avr_qw_not(&tmp, *b);
2442 avr_qw_add(&sum, *a, tmp);
2443
2444 tmp.u64[HI_IDX] = 0;
2445 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2446 avr_qw_add(r, sum, tmp);
2447#endif
2448}
2449
2450void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2451{
2452#ifdef CONFIG_INT128
2453 r->u128 = (~a->u128 < ~b->u128) ||
2454 (a->u128 + ~b->u128 == (__uint128_t)-1);
2455#else
2456 int carry = (avr_qw_cmpu(*a, *b) > 0);
2457 if (!carry) {
2458 ppc_avr_t tmp;
2459 avr_qw_not(&tmp, *b);
2460 avr_qw_add(&tmp, *a, tmp);
2461 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2462 }
2463 r->u64[HI_IDX] = 0;
2464 r->u64[LO_IDX] = carry;
2465#endif
2466}
2467
2468void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2469{
2470#ifdef CONFIG_INT128
2471 r->u128 =
2472 (~a->u128 < ~b->u128) ||
2473 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2474#else
2475 int carry_in = c->u64[LO_IDX] & 1;
2476 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2477 if (!carry_out && carry_in) {
2478 ppc_avr_t tmp;
2479 avr_qw_not(&tmp, *b);
2480 avr_qw_add(&tmp, *a, tmp);
2481 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2482 }
2483
2484 r->u64[HI_IDX] = 0;
2485 r->u64[LO_IDX] = carry_out;
2486#endif
2487}
2488
e8f7b27b
TM
2489#define BCD_PLUS_PREF_1 0xC
2490#define BCD_PLUS_PREF_2 0xF
2491#define BCD_PLUS_ALT_1 0xA
2492#define BCD_NEG_PREF 0xD
2493#define BCD_NEG_ALT 0xB
2494#define BCD_PLUS_ALT_2 0xE
b8155872
JRZ
2495#define NATIONAL_PLUS 0x2B
2496#define NATIONAL_NEG 0x2D
e8f7b27b
TM
2497
2498#if defined(HOST_WORDS_BIGENDIAN)
2499#define BCD_DIG_BYTE(n) (15 - (n/2))
2500#else
2501#define BCD_DIG_BYTE(n) (n/2)
2502#endif
2503
2504static int bcd_get_sgn(ppc_avr_t *bcd)
2505{
2506 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2507 case BCD_PLUS_PREF_1:
2508 case BCD_PLUS_PREF_2:
2509 case BCD_PLUS_ALT_1:
2510 case BCD_PLUS_ALT_2:
2511 {
2512 return 1;
2513 }
2514
2515 case BCD_NEG_PREF:
2516 case BCD_NEG_ALT:
2517 {
2518 return -1;
2519 }
2520
2521 default:
2522 {
2523 return 0;
2524 }
2525 }
2526}
2527
2528static int bcd_preferred_sgn(int sgn, int ps)
2529{
2530 if (sgn >= 0) {
2531 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2532 } else {
2533 return BCD_NEG_PREF;
2534 }
2535}
2536
2537static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2538{
2539 uint8_t result;
2540 if (n & 1) {
2541 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2542 } else {
2543 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2544 }
2545
2546 if (unlikely(result > 9)) {
2547 *invalid = true;
2548 }
2549 return result;
2550}
2551
2552static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2553{
2554 if (n & 1) {
2555 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2556 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2557 } else {
2558 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2559 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2560 }
2561}
2562
b8155872
JRZ
2563static int bcd_cmp_zero(ppc_avr_t *bcd)
2564{
2565 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2566 return 1 << CRF_EQ;
2567 } else {
2568 return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2569 }
2570}
2571
2572static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2573{
2574#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2575 return reg->u16[7 - n];
b8155872
JRZ
2576#else
2577 return reg->u16[n];
2578#endif
2579}
2580
e2106d73
JRZ
2581static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2582{
2583#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2584 reg->u16[7 - n] = val;
e2106d73
JRZ
2585#else
2586 reg->u16[n] = val;
2587#endif
2588}
2589
e8f7b27b
TM
2590static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2591{
2592 int i;
2593 int invalid = 0;
2594 for (i = 31; i > 0; i--) {
2595 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2596 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2597 if (unlikely(invalid)) {
3b163b01 2598 return 0; /* doesn't matter */
e8f7b27b
TM
2599 } else if (dig_a > dig_b) {
2600 return 1;
2601 } else if (dig_a < dig_b) {
2602 return -1;
2603 }
2604 }
2605
2606 return 0;
2607}
2608
2609static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2610 int *overflow)
2611{
2612 int carry = 0;
2613 int i;
2614 int is_zero = 1;
2615 for (i = 1; i <= 31; i++) {
2616 uint8_t digit = bcd_get_digit(a, i, invalid) +
2617 bcd_get_digit(b, i, invalid) + carry;
2618 is_zero &= (digit == 0);
2619 if (digit > 9) {
2620 carry = 1;
2621 digit -= 10;
2622 } else {
2623 carry = 0;
2624 }
2625
2626 bcd_put_digit(t, digit, i);
2627
2628 if (unlikely(*invalid)) {
2629 return -1;
2630 }
2631 }
2632
2633 *overflow = carry;
2634 return is_zero;
2635}
2636
2637static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2638 int *overflow)
2639{
2640 int carry = 0;
2641 int i;
2642 int is_zero = 1;
2643 for (i = 1; i <= 31; i++) {
2644 uint8_t digit = bcd_get_digit(a, i, invalid) -
2645 bcd_get_digit(b, i, invalid) + carry;
2646 is_zero &= (digit == 0);
2647 if (digit & 0x80) {
2648 carry = -1;
2649 digit += 10;
2650 } else {
2651 carry = 0;
2652 }
2653
2654 bcd_put_digit(t, digit, i);
2655
2656 if (unlikely(*invalid)) {
2657 return -1;
2658 }
2659 }
2660
2661 *overflow = carry;
2662 return is_zero;
2663}
2664
2665uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2666{
2667
2668 int sgna = bcd_get_sgn(a);
2669 int sgnb = bcd_get_sgn(b);
2670 int invalid = (sgna == 0) || (sgnb == 0);
2671 int overflow = 0;
2672 int zero = 0;
2673 uint32_t cr = 0;
2674 ppc_avr_t result = { .u64 = { 0, 0 } };
2675
2676 if (!invalid) {
2677 if (sgna == sgnb) {
2678 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2679 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
72189ea4 2680 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2681 } else if (bcd_cmp_mag(a, b) > 0) {
2682 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2683 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
72189ea4 2684 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2685 } else {
2686 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2687 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
72189ea4 2688 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2689 }
2690 }
2691
2692 if (unlikely(invalid)) {
2693 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
72189ea4 2694 cr = 1 << CRF_SO;
e8f7b27b 2695 } else if (overflow) {
72189ea4 2696 cr |= 1 << CRF_SO;
e8f7b27b 2697 } else if (zero) {
72189ea4 2698 cr = 1 << CRF_EQ;
e8f7b27b
TM
2699 }
2700
2701 *r = result;
2702
2703 return cr;
2704}
2705
2706uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2707{
2708 ppc_avr_t bcopy = *b;
2709 int sgnb = bcd_get_sgn(b);
2710 if (sgnb < 0) {
2711 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2712 } else if (sgnb > 0) {
2713 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2714 }
2715 /* else invalid ... defer to bcdadd code for proper handling */
2716
2717 return helper_bcdadd(r, a, &bcopy, ps);
2718}
f293f04a 2719
b8155872
JRZ
2720uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2721{
2722 int i;
2723 int cr = 0;
2724 uint16_t national = 0;
2725 uint16_t sgnb = get_national_digit(b, 0);
2726 ppc_avr_t ret = { .u64 = { 0, 0 } };
2727 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2728
2729 for (i = 1; i < 8; i++) {
2730 national = get_national_digit(b, i);
2731 if (unlikely(national < 0x30 || national > 0x39)) {
2732 invalid = 1;
2733 break;
2734 }
2735
2736 bcd_put_digit(&ret, national & 0xf, i);
2737 }
2738
2739 if (sgnb == NATIONAL_PLUS) {
2740 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2741 } else {
2742 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2743 }
2744
2745 cr = bcd_cmp_zero(&ret);
2746
2747 if (unlikely(invalid)) {
2748 cr = 1 << CRF_SO;
2749 }
2750
2751 *r = ret;
2752
2753 return cr;
2754}
2755
e2106d73
JRZ
2756uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2757{
2758 int i;
2759 int cr = 0;
2760 int sgnb = bcd_get_sgn(b);
2761 int invalid = (sgnb == 0);
2762 ppc_avr_t ret = { .u64 = { 0, 0 } };
2763
2764 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2765
2766 for (i = 1; i < 8; i++) {
2767 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2768
2769 if (unlikely(invalid)) {
2770 break;
2771 }
2772 }
2773 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2774
2775 cr = bcd_cmp_zero(b);
2776
2777 if (ox_flag) {
2778 cr |= 1 << CRF_SO;
2779 }
2780
2781 if (unlikely(invalid)) {
2782 cr = 1 << CRF_SO;
2783 }
2784
2785 *r = ret;
2786
2787 return cr;
2788}
2789
38f4cb04
JRZ
2790uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2791{
2792 int i;
2793 int cr = 0;
2794 int invalid = 0;
2795 int zone_digit = 0;
2796 int zone_lead = ps ? 0xF : 0x3;
2797 int digit = 0;
2798 ppc_avr_t ret = { .u64 = { 0, 0 } };
2799 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2800
2801 if (unlikely((sgnb < 0xA) && ps)) {
2802 invalid = 1;
2803 }
2804
2805 for (i = 0; i < 16; i++) {
2806 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2807 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2808 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2809 invalid = 1;
2810 break;
2811 }
2812
2813 bcd_put_digit(&ret, digit, i + 1);
2814 }
2815
2816 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2817 (!ps && (sgnb & 0x4))) {
2818 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2819 } else {
2820 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2821 }
2822
2823 cr = bcd_cmp_zero(&ret);
2824
2825 if (unlikely(invalid)) {
2826 cr = 1 << CRF_SO;
2827 }
2828
2829 *r = ret;
2830
2831 return cr;
2832}
2833
0a890b31
JRZ
2834uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2835{
2836 int i;
2837 int cr = 0;
2838 uint8_t digit = 0;
2839 int sgnb = bcd_get_sgn(b);
2840 int zone_lead = (ps) ? 0xF0 : 0x30;
2841 int invalid = (sgnb == 0);
2842 ppc_avr_t ret = { .u64 = { 0, 0 } };
2843
2844 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2845
2846 for (i = 0; i < 16; i++) {
2847 digit = bcd_get_digit(b, i + 1, &invalid);
2848
2849 if (unlikely(invalid)) {
2850 break;
2851 }
2852
2853 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2854 }
2855
2856 if (ps) {
2857 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2858 } else {
2859 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2860 }
2861
2862 cr = bcd_cmp_zero(b);
2863
2864 if (ox_flag) {
2865 cr |= 1 << CRF_SO;
2866 }
2867
2868 if (unlikely(invalid)) {
2869 cr = 1 << CRF_SO;
2870 }
2871
2872 *r = ret;
2873
2874 return cr;
2875}
2876
c1542453 2877void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2878{
2879 int i;
2880 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2881 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2882 }
2883}
2884
c1542453 2885void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2886{
65cf1f65 2887 ppc_avr_t result;
557d52fa 2888 int i;
557d52fa 2889
c1542453 2890 VECTOR_FOR_INORDER_I(i, u32) {
65cf1f65 2891 result.AVRW(i) = b->AVRW(i) ^
c1542453
TM
2892 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2893 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2894 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2895 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
557d52fa 2896 }
65cf1f65 2897 *r = result;
557d52fa
TM
2898}
2899
557d52fa
TM
2900void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2901{
65cf1f65 2902 ppc_avr_t result;
c1542453
TM
2903 int i;
2904
2905 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2906 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
c1542453 2907 }
65cf1f65 2908 *r = result;
557d52fa
TM
2909}
2910
2911void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2912{
2913 /* This differs from what is written in ISA V2.07. The RTL is */
2914 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2915 int i;
2916 ppc_avr_t tmp;
2917
2918 VECTOR_FOR_INORDER_I(i, u8) {
2919 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2920 }
2921
2922 VECTOR_FOR_INORDER_I(i, u32) {
2923 r->AVRW(i) =
2924 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2925 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2926 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2927 AES_imc[tmp.AVRB(4*i + 3)][3];
2928 }
557d52fa
TM
2929}
2930
2931void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2932{
65cf1f65 2933 ppc_avr_t result;
c1542453
TM
2934 int i;
2935
2936 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2937 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
c1542453 2938 }
65cf1f65 2939 *r = result;
557d52fa
TM
2940}
2941
57354f8f
TM
2942#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2943#if defined(HOST_WORDS_BIGENDIAN)
2944#define EL_IDX(i) (i)
2945#else
2946#define EL_IDX(i) (3 - (i))
2947#endif
2948
2949void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2950{
2951 int st = (st_six & 0x10) != 0;
2952 int six = st_six & 0xF;
2953 int i;
2954
2955 VECTOR_FOR_INORDER_I(i, u32) {
2956 if (st == 0) {
2957 if ((six & (0x8 >> i)) == 0) {
2958 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2959 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2960 (a->u32[EL_IDX(i)] >> 3);
2961 } else { /* six.bit[i] == 1 */
2962 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2963 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2964 (a->u32[EL_IDX(i)] >> 10);
2965 }
2966 } else { /* st == 1 */
2967 if ((six & (0x8 >> i)) == 0) {
2968 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2969 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2970 ROTRu32(a->u32[EL_IDX(i)], 22);
2971 } else { /* six.bit[i] == 1 */
2972 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2973 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2974 ROTRu32(a->u32[EL_IDX(i)], 25);
2975 }
2976 }
2977 }
2978}
2979
2980#undef ROTRu32
2981#undef EL_IDX
2982
2983#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2984#if defined(HOST_WORDS_BIGENDIAN)
2985#define EL_IDX(i) (i)
2986#else
2987#define EL_IDX(i) (1 - (i))
2988#endif
2989
2990void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2991{
2992 int st = (st_six & 0x10) != 0;
2993 int six = st_six & 0xF;
2994 int i;
2995
2996 VECTOR_FOR_INORDER_I(i, u64) {
2997 if (st == 0) {
2998 if ((six & (0x8 >> (2*i))) == 0) {
2999 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3000 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3001 (a->u64[EL_IDX(i)] >> 7);
3002 } else { /* six.bit[2*i] == 1 */
3003 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3004 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3005 (a->u64[EL_IDX(i)] >> 6);
3006 }
3007 } else { /* st == 1 */
3008 if ((six & (0x8 >> (2*i))) == 0) {
3009 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3010 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3011 ROTRu64(a->u64[EL_IDX(i)], 39);
3012 } else { /* six.bit[2*i] == 1 */
3013 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3014 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3015 ROTRu64(a->u64[EL_IDX(i)], 41);
3016 }
3017 }
3018 }
3019}
3020
3021#undef ROTRu64
3022#undef EL_IDX
3023
ac174549
TM
3024void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3025{
65cf1f65 3026 ppc_avr_t result;
ac174549 3027 int i;
65cf1f65 3028
ac174549
TM
3029 VECTOR_FOR_INORDER_I(i, u8) {
3030 int indexA = c->u8[i] >> 4;
3031 int indexB = c->u8[i] & 0xF;
3032#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 3033 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 3034#else
65cf1f65 3035 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
3036#endif
3037 }
65cf1f65 3038 *r = result;
ac174549
TM
3039}
3040
64654ded
BS
3041#undef VECTOR_FOR_INORDER_I
3042#undef HI_IDX
3043#undef LO_IDX
3044
3045/*****************************************************************************/
3046/* SPE extension helpers */
3047/* Use a table to make this quicker */
ea6c0dac 3048static const uint8_t hbrev[16] = {
64654ded
BS
3049 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3050 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3051};
3052
3053static inline uint8_t byte_reverse(uint8_t val)
3054{
3055 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3056}
3057
3058static inline uint32_t word_reverse(uint32_t val)
3059{
3060 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3061 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3062}
3063
3064#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3065target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3066{
3067 uint32_t a, b, d, mask;
3068
3069 mask = UINT32_MAX >> (32 - MASKBITS);
3070 a = arg1 & mask;
3071 b = arg2 & mask;
3072 d = word_reverse(1 + word_reverse(a | ~b));
3073 return (arg1 & ~mask) | (d & b);
3074}
3075
3076uint32_t helper_cntlsw32(uint32_t val)
3077{
3078 if (val & 0x80000000) {
3079 return clz32(~val);
3080 } else {
3081 return clz32(val);
3082 }
3083}
3084
3085uint32_t helper_cntlzw32(uint32_t val)
3086{
3087 return clz32(val);
3088}
3089
3090/* 440 specific */
d15f74fb
BS
3091target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3092 target_ulong low, uint32_t update_Rc)
64654ded
BS
3093{
3094 target_ulong mask;
3095 int i;
3096
3097 i = 1;
3098 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3099 if ((high & mask) == 0) {
3100 if (update_Rc) {
3101 env->crf[0] = 0x4;
3102 }
3103 goto done;
3104 }
3105 i++;
3106 }
3107 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3108 if ((low & mask) == 0) {
3109 if (update_Rc) {
3110 env->crf[0] = 0x8;
3111 }
3112 goto done;
3113 }
3114 i++;
3115 }
ebbd8b40 3116 i = 8;
64654ded
BS
3117 if (update_Rc) {
3118 env->crf[0] = 0x2;
3119 }
3120 done:
3121 env->xer = (env->xer & ~0x7F) | i;
3122 if (update_Rc) {
3123 env->crf[0] |= xer_so;
3124 }
3125 return i;
3126}