]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/int_helper.c
target-ppc: add dtstsfi[q] instructions
[mirror_qemu.git] / target-ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
63c91552 21#include "exec/exec-all.h"
1de7afc9 22#include "qemu/host-utils.h"
2ef6175a 23#include "exec/helper-proto.h"
6f2945cd 24#include "crypto/aes.h"
64654ded
BS
25
26#include "helper_regs.h"
27/*****************************************************************************/
28/* Fixed point operations helpers */
64654ded 29
6a4fda33
TM
30target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
32{
33 uint64_t rt = 0;
34 int overflow = 0;
35
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
38
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
44 }
45
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
48 }
49
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
55 }
56 }
57
58 return (target_ulong)rt;
59}
60
a98eb9e9
TM
61target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
63{
64 int64_t rt = 0;
65 int overflow = 0;
66
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
69
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
76 }
77
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
80 }
81
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
87 }
88 }
89
90 return (target_ulong)rt;
91}
92
98d1eb27
TM
93#if defined(TARGET_PPC64)
94
95uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
96{
97 uint64_t rt = 0;
98 int overflow = 0;
99
100 overflow = divu128(&rt, &ra, rb);
101
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
104 }
105
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
111 }
112 }
113
114 return rt;
115}
116
e44259b6
TM
117uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118{
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
123
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
126 }
127
128 if (oe) {
129
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
134 }
135 }
136
137 return rt;
138}
139
98d1eb27
TM
140#endif
141
142
64654ded
BS
143target_ulong helper_cntlzw(target_ulong t)
144{
145 return clz32(t);
146}
147
b35344e4
ND
148target_ulong helper_cnttzw(target_ulong t)
149{
150 return ctz32(t);
151}
152
64654ded 153#if defined(TARGET_PPC64)
082ce330
ND
154/* if x = 0xab, returns 0xababababababababa */
155#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
156
157/* substract 1 from each byte, and with inverse, check if MSB is set at each
158 * byte.
159 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
160 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
161 */
162#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
163
164/* When you XOR the pattern and there is a match, that byte will be zero */
165#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
166
167uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
168{
169 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
170}
171
172#undef pattern
173#undef haszero
174#undef hasvalue
175
64654ded
BS
176target_ulong helper_cntlzd(target_ulong t)
177{
178 return clz64(t);
179}
e91d95b2
SD
180
181target_ulong helper_cnttzd(target_ulong t)
182{
183 return ctz64(t);
184}
64654ded
BS
185#endif
186
86ba37ed
TM
187#if defined(TARGET_PPC64)
188
189uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
190{
191 int i;
192 uint64_t ra = 0;
193
194 for (i = 0; i < 8; i++) {
195 int index = (rs >> (i*8)) & 0xFF;
196 if (index < 64) {
197 if (rb & (1ull << (63-index))) {
198 ra |= 1 << i;
199 }
200 }
201 }
202 return ra;
203}
204
205#endif
206
fcfda20f
AJ
207target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
208{
209 target_ulong mask = 0xff;
210 target_ulong ra = 0;
211 int i;
212
213 for (i = 0; i < sizeof(target_ulong); i++) {
214 if ((rs & mask) == (rb & mask)) {
215 ra |= mask;
216 }
217 mask <<= 8;
218 }
219 return ra;
220}
221
64654ded 222/* shift right arithmetic helper */
d15f74fb
BS
223target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
224 target_ulong shift)
64654ded
BS
225{
226 int32_t ret;
227
228 if (likely(!(shift & 0x20))) {
229 if (likely((uint32_t)shift != 0)) {
230 shift &= 0x1f;
231 ret = (int32_t)value >> shift;
232 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
da91a00f 233 env->ca = 0;
64654ded 234 } else {
da91a00f 235 env->ca = 1;
64654ded
BS
236 }
237 } else {
238 ret = (int32_t)value;
da91a00f 239 env->ca = 0;
64654ded
BS
240 }
241 } else {
242 ret = (int32_t)value >> 31;
da91a00f 243 env->ca = (ret != 0);
64654ded
BS
244 }
245 return (target_long)ret;
246}
247
248#if defined(TARGET_PPC64)
d15f74fb
BS
249target_ulong helper_srad(CPUPPCState *env, target_ulong value,
250 target_ulong shift)
64654ded
BS
251{
252 int64_t ret;
253
254 if (likely(!(shift & 0x40))) {
255 if (likely((uint64_t)shift != 0)) {
256 shift &= 0x3f;
257 ret = (int64_t)value >> shift;
4bc02e23 258 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
da91a00f 259 env->ca = 0;
64654ded 260 } else {
da91a00f 261 env->ca = 1;
64654ded
BS
262 }
263 } else {
264 ret = (int64_t)value;
da91a00f 265 env->ca = 0;
64654ded
BS
266 }
267 } else {
268 ret = (int64_t)value >> 63;
da91a00f 269 env->ca = (ret != 0);
64654ded
BS
270 }
271 return ret;
272}
273#endif
274
275#if defined(TARGET_PPC64)
276target_ulong helper_popcntb(target_ulong val)
277{
278 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
279 0x5555555555555555ULL);
280 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
281 0x3333333333333333ULL);
282 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
283 0x0f0f0f0f0f0f0f0fULL);
284 return val;
285}
286
287target_ulong helper_popcntw(target_ulong val)
288{
289 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
290 0x5555555555555555ULL);
291 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
292 0x3333333333333333ULL);
293 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
294 0x0f0f0f0f0f0f0f0fULL);
295 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
296 0x00ff00ff00ff00ffULL);
297 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
298 0x0000ffff0000ffffULL);
299 return val;
300}
301
302target_ulong helper_popcntd(target_ulong val)
303{
304 return ctpop64(val);
305}
306#else
307target_ulong helper_popcntb(target_ulong val)
308{
309 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
310 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
311 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
312 return val;
313}
314
315target_ulong helper_popcntw(target_ulong val)
316{
317 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
318 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
319 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
320 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
321 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
322 return val;
323}
324#endif
325
326/*****************************************************************************/
327/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 328target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
329{
330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
331
332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333 (int32_t)arg2 == 0) {
334 env->spr[SPR_MQ] = 0;
335 return INT32_MIN;
336 } else {
337 env->spr[SPR_MQ] = tmp % arg2;
338 return tmp / (int32_t)arg2;
339 }
340}
341
d15f74fb
BS
342target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
343 target_ulong arg2)
64654ded
BS
344{
345 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
346
347 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
348 (int32_t)arg2 == 0) {
da91a00f 349 env->so = env->ov = 1;
64654ded
BS
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->spr[SPR_MQ] = tmp % arg2;
354 tmp /= (int32_t)arg2;
355 if ((int32_t)tmp != tmp) {
da91a00f 356 env->so = env->ov = 1;
64654ded 357 } else {
da91a00f 358 env->ov = 0;
64654ded
BS
359 }
360 return tmp;
361 }
362}
363
d15f74fb
BS
364target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
365 target_ulong arg2)
64654ded
BS
366{
367 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
368 (int32_t)arg2 == 0) {
369 env->spr[SPR_MQ] = 0;
370 return INT32_MIN;
371 } else {
372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373 return (int32_t)arg1 / (int32_t)arg2;
374 }
375}
376
d15f74fb
BS
377target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
378 target_ulong arg2)
64654ded
BS
379{
380 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
381 (int32_t)arg2 == 0) {
da91a00f 382 env->so = env->ov = 1;
64654ded
BS
383 env->spr[SPR_MQ] = 0;
384 return INT32_MIN;
385 } else {
da91a00f 386 env->ov = 0;
64654ded
BS
387 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
388 return (int32_t)arg1 / (int32_t)arg2;
389 }
390}
391
392/*****************************************************************************/
393/* 602 specific instructions */
394/* mfrom is the most crazy instruction ever seen, imho ! */
395/* Real implementation uses a ROM table. Do the same */
396/* Extremely decomposed:
397 * -arg / 256
398 * return 256 * log10(10 + 1.0) + 0.5
399 */
400#if !defined(CONFIG_USER_ONLY)
401target_ulong helper_602_mfrom(target_ulong arg)
402{
403 if (likely(arg < 602)) {
404#include "mfrom_table.c"
405 return mfrom_ROM_table[arg];
406 } else {
407 return 0;
408 }
409}
410#endif
411
412/*****************************************************************************/
413/* Altivec extension helpers */
414#if defined(HOST_WORDS_BIGENDIAN)
415#define HI_IDX 0
416#define LO_IDX 1
c1542453
TM
417#define AVRB(i) u8[i]
418#define AVRW(i) u32[i]
64654ded
BS
419#else
420#define HI_IDX 1
421#define LO_IDX 0
c1542453
TM
422#define AVRB(i) u8[15-(i)]
423#define AVRW(i) u32[3-(i)]
64654ded
BS
424#endif
425
426#if defined(HOST_WORDS_BIGENDIAN)
427#define VECTOR_FOR_INORDER_I(index, element) \
428 for (index = 0; index < ARRAY_SIZE(r->element); index++)
429#else
430#define VECTOR_FOR_INORDER_I(index, element) \
431 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
432#endif
433
64654ded
BS
434/* Saturating arithmetic helpers. */
435#define SATCVT(from, to, from_type, to_type, min, max) \
436 static inline to_type cvt##from##to(from_type x, int *sat) \
437 { \
438 to_type r; \
439 \
440 if (x < (from_type)min) { \
441 r = min; \
442 *sat = 1; \
443 } else if (x > (from_type)max) { \
444 r = max; \
445 *sat = 1; \
446 } else { \
447 r = x; \
448 } \
449 return r; \
450 }
451#define SATCVTU(from, to, from_type, to_type, min, max) \
452 static inline to_type cvt##from##to(from_type x, int *sat) \
453 { \
454 to_type r; \
455 \
456 if (x > (from_type)max) { \
457 r = max; \
458 *sat = 1; \
459 } else { \
460 r = x; \
461 } \
462 return r; \
463 }
464SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
465SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
466SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
467
468SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
469SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
470SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
471SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
472SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
473SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
474#undef SATCVT
475#undef SATCVTU
476
477void helper_lvsl(ppc_avr_t *r, target_ulong sh)
478{
479 int i, j = (sh & 0xf);
480
481 VECTOR_FOR_INORDER_I(i, u8) {
482 r->u8[i] = j++;
483 }
484}
485
486void helper_lvsr(ppc_avr_t *r, target_ulong sh)
487{
488 int i, j = 0x10 - (sh & 0xf);
489
490 VECTOR_FOR_INORDER_I(i, u8) {
491 r->u8[i] = j++;
492 }
493}
494
d15f74fb 495void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
496{
497#if defined(HOST_WORDS_BIGENDIAN)
498 env->vscr = r->u32[3];
499#else
500 env->vscr = r->u32[0];
501#endif
502 set_flush_to_zero(vscr_nj, &env->vec_status);
503}
504
505void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
506{
507 int i;
508
509 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
510 r->u32[i] = ~a->u32[i] < b->u32[i];
511 }
512}
513
514#define VARITH_DO(name, op, element) \
515 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
516 { \
517 int i; \
518 \
519 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
520 r->element[i] = a->element[i] op b->element[i]; \
521 } \
522 }
523#define VARITH(suffix, element) \
524 VARITH_DO(add##suffix, +, element) \
525 VARITH_DO(sub##suffix, -, element)
526VARITH(ubm, u8)
527VARITH(uhm, u16)
528VARITH(uwm, u32)
56eabc75 529VARITH(udm, u64)
953f0f58 530VARITH_DO(muluwm, *, u32)
64654ded
BS
531#undef VARITH_DO
532#undef VARITH
533
534#define VARITHFP(suffix, func) \
d15f74fb
BS
535 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
536 ppc_avr_t *b) \
64654ded
BS
537 { \
538 int i; \
539 \
540 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 541 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
542 } \
543 }
544VARITHFP(addfp, float32_add)
545VARITHFP(subfp, float32_sub)
db1babb8
AJ
546VARITHFP(minfp, float32_min)
547VARITHFP(maxfp, float32_max)
64654ded
BS
548#undef VARITHFP
549
2f93c23f
AJ
550#define VARITHFPFMA(suffix, type) \
551 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 ppc_avr_t *b, ppc_avr_t *c) \
553 { \
554 int i; \
555 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
556 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
557 type, &env->vec_status); \
558 } \
559 }
560VARITHFPFMA(maddfp, 0);
561VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
562#undef VARITHFPFMA
563
64654ded
BS
564#define VARITHSAT_CASE(type, op, cvt, element) \
565 { \
566 type result = (type)a->element[i] op (type)b->element[i]; \
567 r->element[i] = cvt(result, &sat); \
568 }
569
570#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
571 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
572 ppc_avr_t *b) \
64654ded
BS
573 { \
574 int sat = 0; \
575 int i; \
576 \
577 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
578 switch (sizeof(r->element[0])) { \
579 case 1: \
580 VARITHSAT_CASE(optype, op, cvt, element); \
581 break; \
582 case 2: \
583 VARITHSAT_CASE(optype, op, cvt, element); \
584 break; \
585 case 4: \
586 VARITHSAT_CASE(optype, op, cvt, element); \
587 break; \
588 } \
589 } \
590 if (sat) { \
591 env->vscr |= (1 << VSCR_SAT); \
592 } \
593 }
594#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
595 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
596 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
597#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
598 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
599 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
600VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
601VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
602VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
603VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
604VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
605VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
606#undef VARITHSAT_CASE
607#undef VARITHSAT_DO
608#undef VARITHSAT_SIGNED
609#undef VARITHSAT_UNSIGNED
610
611#define VAVG_DO(name, element, etype) \
612 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
613 { \
614 int i; \
615 \
616 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
617 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
618 r->element[i] = x >> 1; \
619 } \
620 }
621
622#define VAVG(type, signed_element, signed_type, unsigned_element, \
623 unsigned_type) \
624 VAVG_DO(avgs##type, signed_element, signed_type) \
625 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
626VAVG(b, s8, int16_t, u8, uint16_t)
627VAVG(h, s16, int32_t, u16, uint32_t)
628VAVG(w, s32, int64_t, u32, uint64_t)
629#undef VAVG_DO
630#undef VAVG
631
632#define VCF(suffix, cvt, element) \
d15f74fb
BS
633 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
634 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
635 { \
636 int i; \
637 \
638 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
639 float32 t = cvt(b->element[i], &env->vec_status); \
640 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
641 } \
642 }
643VCF(ux, uint32_to_float32, u32)
644VCF(sx, int32_to_float32, s32)
645#undef VCF
646
647#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
648 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
649 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 650 { \
6f3dab41
TM
651 uint64_t ones = (uint64_t)-1; \
652 uint64_t all = ones; \
653 uint64_t none = 0; \
64654ded
BS
654 int i; \
655 \
656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 657 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
658 ones : 0x0); \
659 switch (sizeof(a->element[0])) { \
6f3dab41
TM
660 case 8: \
661 r->u64[i] = result; \
662 break; \
64654ded
BS
663 case 4: \
664 r->u32[i] = result; \
665 break; \
666 case 2: \
667 r->u16[i] = result; \
668 break; \
669 case 1: \
670 r->u8[i] = result; \
671 break; \
672 } \
673 all &= result; \
674 none |= result; \
675 } \
676 if (record) { \
677 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
678 } \
679 }
680#define VCMP(suffix, compare, element) \
681 VCMP_DO(suffix, compare, element, 0) \
682 VCMP_DO(suffix##_dot, compare, element, 1)
683VCMP(equb, ==, u8)
684VCMP(equh, ==, u16)
685VCMP(equw, ==, u32)
6f3dab41 686VCMP(equd, ==, u64)
64654ded
BS
687VCMP(gtub, >, u8)
688VCMP(gtuh, >, u16)
689VCMP(gtuw, >, u32)
6f3dab41 690VCMP(gtud, >, u64)
64654ded
BS
691VCMP(gtsb, >, s8)
692VCMP(gtsh, >, s16)
693VCMP(gtsw, >, s32)
6f3dab41 694VCMP(gtsd, >, s64)
64654ded
BS
695#undef VCMP_DO
696#undef VCMP
697
698#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
699 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
700 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
701 { \
702 uint32_t ones = (uint32_t)-1; \
703 uint32_t all = ones; \
704 uint32_t none = 0; \
705 int i; \
706 \
707 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
708 uint32_t result; \
709 int rel = float32_compare_quiet(a->f[i], b->f[i], \
710 &env->vec_status); \
711 if (rel == float_relation_unordered) { \
712 result = 0; \
713 } else if (rel compare order) { \
714 result = ones; \
715 } else { \
716 result = 0; \
717 } \
718 r->u32[i] = result; \
719 all &= result; \
720 none |= result; \
721 } \
722 if (record) { \
723 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
724 } \
725 }
726#define VCMPFP(suffix, compare, order) \
727 VCMPFP_DO(suffix, compare, order, 0) \
728 VCMPFP_DO(suffix##_dot, compare, order, 1)
729VCMPFP(eqfp, ==, float_relation_equal)
730VCMPFP(gefp, !=, float_relation_less)
731VCMPFP(gtfp, ==, float_relation_greater)
732#undef VCMPFP_DO
733#undef VCMPFP
734
d15f74fb
BS
735static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
736 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
737{
738 int i;
739 int all_in = 0;
740
741 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
742 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
743 if (le_rel == float_relation_unordered) {
744 r->u32[i] = 0xc0000000;
4007b8de 745 all_in = 1;
64654ded
BS
746 } else {
747 float32 bneg = float32_chs(b->f[i]);
748 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
749 int le = le_rel != float_relation_greater;
750 int ge = ge_rel != float_relation_less;
751
752 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
753 all_in |= (!le | !ge);
754 }
755 }
756 if (record) {
757 env->crf[6] = (all_in == 0) << 1;
758 }
759}
760
d15f74fb 761void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 762{
d15f74fb 763 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
764}
765
d15f74fb
BS
766void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
767 ppc_avr_t *b)
64654ded 768{
d15f74fb 769 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
770}
771
772#define VCT(suffix, satcvt, element) \
d15f74fb
BS
773 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
774 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
775 { \
776 int i; \
777 int sat = 0; \
778 float_status s = env->vec_status; \
779 \
780 set_float_rounding_mode(float_round_to_zero, &s); \
781 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
782 if (float32_is_any_nan(b->f[i])) { \
783 r->element[i] = 0; \
784 } else { \
785 float64 t = float32_to_float64(b->f[i], &s); \
786 int64_t j; \
787 \
788 t = float64_scalbn(t, uim, &s); \
789 j = float64_to_int64(t, &s); \
790 r->element[i] = satcvt(j, &sat); \
791 } \
792 } \
793 if (sat) { \
794 env->vscr |= (1 << VSCR_SAT); \
795 } \
796 }
797VCT(uxs, cvtsduw, u32)
798VCT(sxs, cvtsdsw, s32)
799#undef VCT
800
d15f74fb
BS
801void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
802 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
803{
804 int sat = 0;
805 int i;
806
807 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
808 int32_t prod = a->s16[i] * b->s16[i];
809 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
810
811 r->s16[i] = cvtswsh(t, &sat);
812 }
813
814 if (sat) {
815 env->vscr |= (1 << VSCR_SAT);
816 }
817}
818
d15f74fb
BS
819void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
820 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
821{
822 int sat = 0;
823 int i;
824
825 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
826 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
827 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
828 r->s16[i] = cvtswsh(t, &sat);
829 }
830
831 if (sat) {
832 env->vscr |= (1 << VSCR_SAT);
833 }
834}
835
836#define VMINMAX_DO(name, compare, element) \
837 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
838 { \
839 int i; \
840 \
841 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
842 if (a->element[i] compare b->element[i]) { \
843 r->element[i] = b->element[i]; \
844 } else { \
845 r->element[i] = a->element[i]; \
846 } \
847 } \
848 }
849#define VMINMAX(suffix, element) \
850 VMINMAX_DO(min##suffix, >, element) \
851 VMINMAX_DO(max##suffix, <, element)
852VMINMAX(sb, s8)
853VMINMAX(sh, s16)
854VMINMAX(sw, s32)
8203e31b 855VMINMAX(sd, s64)
64654ded
BS
856VMINMAX(ub, u8)
857VMINMAX(uh, u16)
858VMINMAX(uw, u32)
8203e31b 859VMINMAX(ud, u64)
64654ded
BS
860#undef VMINMAX_DO
861#undef VMINMAX
862
64654ded
BS
863void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
864{
865 int i;
866
867 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
868 int32_t prod = a->s16[i] * b->s16[i];
869 r->s16[i] = (int16_t) (prod + c->s16[i]);
870 }
871}
872
873#define VMRG_DO(name, element, highp) \
874 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
875 { \
876 ppc_avr_t result; \
877 int i; \
878 size_t n_elems = ARRAY_SIZE(r->element); \
879 \
880 for (i = 0; i < n_elems / 2; i++) { \
881 if (highp) { \
882 result.element[i*2+HI_IDX] = a->element[i]; \
883 result.element[i*2+LO_IDX] = b->element[i]; \
884 } else { \
885 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
886 b->element[n_elems - i - 1]; \
887 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
888 a->element[n_elems - i - 1]; \
889 } \
890 } \
891 *r = result; \
892 }
893#if defined(HOST_WORDS_BIGENDIAN)
894#define MRGHI 0
895#define MRGLO 1
896#else
897#define MRGHI 1
898#define MRGLO 0
899#endif
900#define VMRG(suffix, element) \
901 VMRG_DO(mrgl##suffix, element, MRGHI) \
902 VMRG_DO(mrgh##suffix, element, MRGLO)
903VMRG(b, u8)
904VMRG(h, u16)
905VMRG(w, u32)
906#undef VMRG_DO
907#undef VMRG
908#undef MRGHI
909#undef MRGLO
910
d15f74fb
BS
911void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
912 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
913{
914 int32_t prod[16];
915 int i;
916
917 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
918 prod[i] = (int32_t)a->s8[i] * b->u8[i];
919 }
920
921 VECTOR_FOR_INORDER_I(i, s32) {
922 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
923 prod[4 * i + 2] + prod[4 * i + 3];
924 }
925}
926
d15f74fb
BS
927void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
928 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
929{
930 int32_t prod[8];
931 int i;
932
933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
934 prod[i] = a->s16[i] * b->s16[i];
935 }
936
937 VECTOR_FOR_INORDER_I(i, s32) {
938 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
939 }
940}
941
d15f74fb
BS
942void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
943 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
944{
945 int32_t prod[8];
946 int i;
947 int sat = 0;
948
949 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
950 prod[i] = (int32_t)a->s16[i] * b->s16[i];
951 }
952
953 VECTOR_FOR_INORDER_I(i, s32) {
954 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
955
956 r->u32[i] = cvtsdsw(t, &sat);
957 }
958
959 if (sat) {
960 env->vscr |= (1 << VSCR_SAT);
961 }
962}
963
d15f74fb
BS
964void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
965 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
966{
967 uint16_t prod[16];
968 int i;
969
970 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
971 prod[i] = a->u8[i] * b->u8[i];
972 }
973
974 VECTOR_FOR_INORDER_I(i, u32) {
975 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
976 prod[4 * i + 2] + prod[4 * i + 3];
977 }
978}
979
d15f74fb
BS
980void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
981 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
982{
983 uint32_t prod[8];
984 int i;
985
986 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
987 prod[i] = a->u16[i] * b->u16[i];
988 }
989
990 VECTOR_FOR_INORDER_I(i, u32) {
991 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
992 }
993}
994
d15f74fb
BS
995void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
996 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
997{
998 uint32_t prod[8];
999 int i;
1000 int sat = 0;
1001
1002 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1003 prod[i] = a->u16[i] * b->u16[i];
1004 }
1005
1006 VECTOR_FOR_INORDER_I(i, s32) {
1007 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1008
1009 r->u32[i] = cvtuduw(t, &sat);
1010 }
1011
1012 if (sat) {
1013 env->vscr |= (1 << VSCR_SAT);
1014 }
1015}
1016
aa9e930c 1017#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1018 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1019 { \
1020 int i; \
1021 \
1022 VECTOR_FOR_INORDER_I(i, prod_element) { \
1023 if (evenp) { \
aa9e930c
TM
1024 r->prod_element[i] = \
1025 (cast)a->mul_element[i * 2 + HI_IDX] * \
1026 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1027 } else { \
aa9e930c
TM
1028 r->prod_element[i] = \
1029 (cast)a->mul_element[i * 2 + LO_IDX] * \
1030 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1031 } \
1032 } \
1033 }
aa9e930c
TM
1034#define VMUL(suffix, mul_element, prod_element, cast) \
1035 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1036 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1037VMUL(sb, s8, s16, int16_t)
1038VMUL(sh, s16, s32, int32_t)
63be0936 1039VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1040VMUL(ub, u8, u16, uint16_t)
1041VMUL(uh, u16, u32, uint32_t)
63be0936 1042VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1043#undef VMUL_DO
1044#undef VMUL
1045
d15f74fb
BS
1046void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1047 ppc_avr_t *c)
64654ded
BS
1048{
1049 ppc_avr_t result;
1050 int i;
1051
1052 VECTOR_FOR_INORDER_I(i, u8) {
1053 int s = c->u8[i] & 0x1f;
1054#if defined(HOST_WORDS_BIGENDIAN)
1055 int index = s & 0xf;
1056#else
1057 int index = 15 - (s & 0xf);
1058#endif
1059
1060 if (s & 0x10) {
1061 result.u8[i] = b->u8[index];
1062 } else {
1063 result.u8[i] = a->u8[index];
1064 }
1065 }
1066 *r = result;
1067}
1068
4d82038e
TM
1069#if defined(HOST_WORDS_BIGENDIAN)
1070#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1071#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1072#else
1073#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1074#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1075#endif
1076
1077void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1078{
1079 int i;
1080 uint64_t perm = 0;
1081
1082 VECTOR_FOR_INORDER_I(i, u8) {
1083 int index = VBPERMQ_INDEX(b, i);
1084
1085 if (index < 128) {
1086 uint64_t mask = (1ull << (63-(index & 0x3F)));
1087 if (a->u64[VBPERMQ_DW(index)] & mask) {
1088 perm |= (0x8000 >> i);
1089 }
1090 }
1091 }
1092
1093 r->u64[HI_IDX] = perm;
1094 r->u64[LO_IDX] = 0;
1095}
1096
1097#undef VBPERMQ_INDEX
1098#undef VBPERMQ_DW
1099
cfd54a04 1100static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1101 0x0000000000000000ull, /* 00 */
1102 0x0000000000000080ull, /* 01 */
1103 0x0000000000008000ull, /* 02 */
1104 0x0000000000008080ull, /* 03 */
1105 0x0000000000800000ull, /* 04 */
1106 0x0000000000800080ull, /* 05 */
1107 0x0000000000808000ull, /* 06 */
1108 0x0000000000808080ull, /* 07 */
1109 0x0000000080000000ull, /* 08 */
1110 0x0000000080000080ull, /* 09 */
1111 0x0000000080008000ull, /* 0A */
1112 0x0000000080008080ull, /* 0B */
1113 0x0000000080800000ull, /* 0C */
1114 0x0000000080800080ull, /* 0D */
1115 0x0000000080808000ull, /* 0E */
1116 0x0000000080808080ull, /* 0F */
1117 0x0000008000000000ull, /* 10 */
1118 0x0000008000000080ull, /* 11 */
1119 0x0000008000008000ull, /* 12 */
1120 0x0000008000008080ull, /* 13 */
1121 0x0000008000800000ull, /* 14 */
1122 0x0000008000800080ull, /* 15 */
1123 0x0000008000808000ull, /* 16 */
1124 0x0000008000808080ull, /* 17 */
1125 0x0000008080000000ull, /* 18 */
1126 0x0000008080000080ull, /* 19 */
1127 0x0000008080008000ull, /* 1A */
1128 0x0000008080008080ull, /* 1B */
1129 0x0000008080800000ull, /* 1C */
1130 0x0000008080800080ull, /* 1D */
1131 0x0000008080808000ull, /* 1E */
1132 0x0000008080808080ull, /* 1F */
1133 0x0000800000000000ull, /* 20 */
1134 0x0000800000000080ull, /* 21 */
1135 0x0000800000008000ull, /* 22 */
1136 0x0000800000008080ull, /* 23 */
1137 0x0000800000800000ull, /* 24 */
1138 0x0000800000800080ull, /* 25 */
1139 0x0000800000808000ull, /* 26 */
1140 0x0000800000808080ull, /* 27 */
1141 0x0000800080000000ull, /* 28 */
1142 0x0000800080000080ull, /* 29 */
1143 0x0000800080008000ull, /* 2A */
1144 0x0000800080008080ull, /* 2B */
1145 0x0000800080800000ull, /* 2C */
1146 0x0000800080800080ull, /* 2D */
1147 0x0000800080808000ull, /* 2E */
1148 0x0000800080808080ull, /* 2F */
1149 0x0000808000000000ull, /* 30 */
1150 0x0000808000000080ull, /* 31 */
1151 0x0000808000008000ull, /* 32 */
1152 0x0000808000008080ull, /* 33 */
1153 0x0000808000800000ull, /* 34 */
1154 0x0000808000800080ull, /* 35 */
1155 0x0000808000808000ull, /* 36 */
1156 0x0000808000808080ull, /* 37 */
1157 0x0000808080000000ull, /* 38 */
1158 0x0000808080000080ull, /* 39 */
1159 0x0000808080008000ull, /* 3A */
1160 0x0000808080008080ull, /* 3B */
1161 0x0000808080800000ull, /* 3C */
1162 0x0000808080800080ull, /* 3D */
1163 0x0000808080808000ull, /* 3E */
1164 0x0000808080808080ull, /* 3F */
1165 0x0080000000000000ull, /* 40 */
1166 0x0080000000000080ull, /* 41 */
1167 0x0080000000008000ull, /* 42 */
1168 0x0080000000008080ull, /* 43 */
1169 0x0080000000800000ull, /* 44 */
1170 0x0080000000800080ull, /* 45 */
1171 0x0080000000808000ull, /* 46 */
1172 0x0080000000808080ull, /* 47 */
1173 0x0080000080000000ull, /* 48 */
1174 0x0080000080000080ull, /* 49 */
1175 0x0080000080008000ull, /* 4A */
1176 0x0080000080008080ull, /* 4B */
1177 0x0080000080800000ull, /* 4C */
1178 0x0080000080800080ull, /* 4D */
1179 0x0080000080808000ull, /* 4E */
1180 0x0080000080808080ull, /* 4F */
1181 0x0080008000000000ull, /* 50 */
1182 0x0080008000000080ull, /* 51 */
1183 0x0080008000008000ull, /* 52 */
1184 0x0080008000008080ull, /* 53 */
1185 0x0080008000800000ull, /* 54 */
1186 0x0080008000800080ull, /* 55 */
1187 0x0080008000808000ull, /* 56 */
1188 0x0080008000808080ull, /* 57 */
1189 0x0080008080000000ull, /* 58 */
1190 0x0080008080000080ull, /* 59 */
1191 0x0080008080008000ull, /* 5A */
1192 0x0080008080008080ull, /* 5B */
1193 0x0080008080800000ull, /* 5C */
1194 0x0080008080800080ull, /* 5D */
1195 0x0080008080808000ull, /* 5E */
1196 0x0080008080808080ull, /* 5F */
1197 0x0080800000000000ull, /* 60 */
1198 0x0080800000000080ull, /* 61 */
1199 0x0080800000008000ull, /* 62 */
1200 0x0080800000008080ull, /* 63 */
1201 0x0080800000800000ull, /* 64 */
1202 0x0080800000800080ull, /* 65 */
1203 0x0080800000808000ull, /* 66 */
1204 0x0080800000808080ull, /* 67 */
1205 0x0080800080000000ull, /* 68 */
1206 0x0080800080000080ull, /* 69 */
1207 0x0080800080008000ull, /* 6A */
1208 0x0080800080008080ull, /* 6B */
1209 0x0080800080800000ull, /* 6C */
1210 0x0080800080800080ull, /* 6D */
1211 0x0080800080808000ull, /* 6E */
1212 0x0080800080808080ull, /* 6F */
1213 0x0080808000000000ull, /* 70 */
1214 0x0080808000000080ull, /* 71 */
1215 0x0080808000008000ull, /* 72 */
1216 0x0080808000008080ull, /* 73 */
1217 0x0080808000800000ull, /* 74 */
1218 0x0080808000800080ull, /* 75 */
1219 0x0080808000808000ull, /* 76 */
1220 0x0080808000808080ull, /* 77 */
1221 0x0080808080000000ull, /* 78 */
1222 0x0080808080000080ull, /* 79 */
1223 0x0080808080008000ull, /* 7A */
1224 0x0080808080008080ull, /* 7B */
1225 0x0080808080800000ull, /* 7C */
1226 0x0080808080800080ull, /* 7D */
1227 0x0080808080808000ull, /* 7E */
1228 0x0080808080808080ull, /* 7F */
1229 0x8000000000000000ull, /* 80 */
1230 0x8000000000000080ull, /* 81 */
1231 0x8000000000008000ull, /* 82 */
1232 0x8000000000008080ull, /* 83 */
1233 0x8000000000800000ull, /* 84 */
1234 0x8000000000800080ull, /* 85 */
1235 0x8000000000808000ull, /* 86 */
1236 0x8000000000808080ull, /* 87 */
1237 0x8000000080000000ull, /* 88 */
1238 0x8000000080000080ull, /* 89 */
1239 0x8000000080008000ull, /* 8A */
1240 0x8000000080008080ull, /* 8B */
1241 0x8000000080800000ull, /* 8C */
1242 0x8000000080800080ull, /* 8D */
1243 0x8000000080808000ull, /* 8E */
1244 0x8000000080808080ull, /* 8F */
1245 0x8000008000000000ull, /* 90 */
1246 0x8000008000000080ull, /* 91 */
1247 0x8000008000008000ull, /* 92 */
1248 0x8000008000008080ull, /* 93 */
1249 0x8000008000800000ull, /* 94 */
1250 0x8000008000800080ull, /* 95 */
1251 0x8000008000808000ull, /* 96 */
1252 0x8000008000808080ull, /* 97 */
1253 0x8000008080000000ull, /* 98 */
1254 0x8000008080000080ull, /* 99 */
1255 0x8000008080008000ull, /* 9A */
1256 0x8000008080008080ull, /* 9B */
1257 0x8000008080800000ull, /* 9C */
1258 0x8000008080800080ull, /* 9D */
1259 0x8000008080808000ull, /* 9E */
1260 0x8000008080808080ull, /* 9F */
1261 0x8000800000000000ull, /* A0 */
1262 0x8000800000000080ull, /* A1 */
1263 0x8000800000008000ull, /* A2 */
1264 0x8000800000008080ull, /* A3 */
1265 0x8000800000800000ull, /* A4 */
1266 0x8000800000800080ull, /* A5 */
1267 0x8000800000808000ull, /* A6 */
1268 0x8000800000808080ull, /* A7 */
1269 0x8000800080000000ull, /* A8 */
1270 0x8000800080000080ull, /* A9 */
1271 0x8000800080008000ull, /* AA */
1272 0x8000800080008080ull, /* AB */
1273 0x8000800080800000ull, /* AC */
1274 0x8000800080800080ull, /* AD */
1275 0x8000800080808000ull, /* AE */
1276 0x8000800080808080ull, /* AF */
1277 0x8000808000000000ull, /* B0 */
1278 0x8000808000000080ull, /* B1 */
1279 0x8000808000008000ull, /* B2 */
1280 0x8000808000008080ull, /* B3 */
1281 0x8000808000800000ull, /* B4 */
1282 0x8000808000800080ull, /* B5 */
1283 0x8000808000808000ull, /* B6 */
1284 0x8000808000808080ull, /* B7 */
1285 0x8000808080000000ull, /* B8 */
1286 0x8000808080000080ull, /* B9 */
1287 0x8000808080008000ull, /* BA */
1288 0x8000808080008080ull, /* BB */
1289 0x8000808080800000ull, /* BC */
1290 0x8000808080800080ull, /* BD */
1291 0x8000808080808000ull, /* BE */
1292 0x8000808080808080ull, /* BF */
1293 0x8080000000000000ull, /* C0 */
1294 0x8080000000000080ull, /* C1 */
1295 0x8080000000008000ull, /* C2 */
1296 0x8080000000008080ull, /* C3 */
1297 0x8080000000800000ull, /* C4 */
1298 0x8080000000800080ull, /* C5 */
1299 0x8080000000808000ull, /* C6 */
1300 0x8080000000808080ull, /* C7 */
1301 0x8080000080000000ull, /* C8 */
1302 0x8080000080000080ull, /* C9 */
1303 0x8080000080008000ull, /* CA */
1304 0x8080000080008080ull, /* CB */
1305 0x8080000080800000ull, /* CC */
1306 0x8080000080800080ull, /* CD */
1307 0x8080000080808000ull, /* CE */
1308 0x8080000080808080ull, /* CF */
1309 0x8080008000000000ull, /* D0 */
1310 0x8080008000000080ull, /* D1 */
1311 0x8080008000008000ull, /* D2 */
1312 0x8080008000008080ull, /* D3 */
1313 0x8080008000800000ull, /* D4 */
1314 0x8080008000800080ull, /* D5 */
1315 0x8080008000808000ull, /* D6 */
1316 0x8080008000808080ull, /* D7 */
1317 0x8080008080000000ull, /* D8 */
1318 0x8080008080000080ull, /* D9 */
1319 0x8080008080008000ull, /* DA */
1320 0x8080008080008080ull, /* DB */
1321 0x8080008080800000ull, /* DC */
1322 0x8080008080800080ull, /* DD */
1323 0x8080008080808000ull, /* DE */
1324 0x8080008080808080ull, /* DF */
1325 0x8080800000000000ull, /* E0 */
1326 0x8080800000000080ull, /* E1 */
1327 0x8080800000008000ull, /* E2 */
1328 0x8080800000008080ull, /* E3 */
1329 0x8080800000800000ull, /* E4 */
1330 0x8080800000800080ull, /* E5 */
1331 0x8080800000808000ull, /* E6 */
1332 0x8080800000808080ull, /* E7 */
1333 0x8080800080000000ull, /* E8 */
1334 0x8080800080000080ull, /* E9 */
1335 0x8080800080008000ull, /* EA */
1336 0x8080800080008080ull, /* EB */
1337 0x8080800080800000ull, /* EC */
1338 0x8080800080800080ull, /* ED */
1339 0x8080800080808000ull, /* EE */
1340 0x8080800080808080ull, /* EF */
1341 0x8080808000000000ull, /* F0 */
1342 0x8080808000000080ull, /* F1 */
1343 0x8080808000008000ull, /* F2 */
1344 0x8080808000008080ull, /* F3 */
1345 0x8080808000800000ull, /* F4 */
1346 0x8080808000800080ull, /* F5 */
1347 0x8080808000808000ull, /* F6 */
1348 0x8080808000808080ull, /* F7 */
1349 0x8080808080000000ull, /* F8 */
1350 0x8080808080000080ull, /* F9 */
1351 0x8080808080008000ull, /* FA */
1352 0x8080808080008080ull, /* FB */
1353 0x8080808080800000ull, /* FC */
1354 0x8080808080800080ull, /* FD */
1355 0x8080808080808000ull, /* FE */
1356 0x8080808080808080ull, /* FF */
1357};
1358
1359void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1360{
1361 int i;
1362 uint64_t t[2] = { 0, 0 };
1363
1364 VECTOR_FOR_INORDER_I(i, u8) {
1365#if defined(HOST_WORDS_BIGENDIAN)
1366 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1367#else
1368 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1369#endif
1370 }
1371
1372 r->u64[0] = t[0];
1373 r->u64[1] = t[1];
1374}
1375
b8476fc7
TM
1376#define PMSUM(name, srcfld, trgfld, trgtyp) \
1377void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1378{ \
1379 int i, j; \
1380 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1381 \
1382 VECTOR_FOR_INORDER_I(i, srcfld) { \
1383 prod[i] = 0; \
1384 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1385 if (a->srcfld[i] & (1ull<<j)) { \
1386 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1387 } \
1388 } \
1389 } \
1390 \
1391 VECTOR_FOR_INORDER_I(i, trgfld) { \
1392 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1393 } \
1394}
1395
1396PMSUM(vpmsumb, u8, u16, uint16_t)
1397PMSUM(vpmsumh, u16, u32, uint32_t)
1398PMSUM(vpmsumw, u32, u64, uint64_t)
1399
1400void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1401{
1402
1403#ifdef CONFIG_INT128
1404 int i, j;
1405 __uint128_t prod[2];
1406
1407 VECTOR_FOR_INORDER_I(i, u64) {
1408 prod[i] = 0;
1409 for (j = 0; j < 64; j++) {
1410 if (a->u64[i] & (1ull<<j)) {
1411 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1412 }
1413 }
1414 }
1415
1416 r->u128 = prod[0] ^ prod[1];
1417
1418#else
1419 int i, j;
1420 ppc_avr_t prod[2];
1421
1422 VECTOR_FOR_INORDER_I(i, u64) {
1423 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1424 for (j = 0; j < 64; j++) {
1425 if (a->u64[i] & (1ull<<j)) {
1426 ppc_avr_t bshift;
1427 if (j == 0) {
1428 bshift.u64[HI_IDX] = 0;
1429 bshift.u64[LO_IDX] = b->u64[i];
1430 } else {
1431 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1432 bshift.u64[LO_IDX] = b->u64[i] << j;
1433 }
1434 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1435 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1436 }
1437 }
1438 }
1439
1440 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1441 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1442#endif
1443}
1444
1445
64654ded
BS
1446#if defined(HOST_WORDS_BIGENDIAN)
1447#define PKBIG 1
1448#else
1449#define PKBIG 0
1450#endif
1451void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1452{
1453 int i, j;
1454 ppc_avr_t result;
1455#if defined(HOST_WORDS_BIGENDIAN)
1456 const ppc_avr_t *x[2] = { a, b };
1457#else
1458 const ppc_avr_t *x[2] = { b, a };
1459#endif
1460
1461 VECTOR_FOR_INORDER_I(i, u64) {
1462 VECTOR_FOR_INORDER_I(j, u32) {
1463 uint32_t e = x[i]->u32[j];
1464
1465 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1466 ((e >> 6) & 0x3e0) |
1467 ((e >> 3) & 0x1f));
1468 }
1469 }
1470 *r = result;
1471}
1472
1473#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1474 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1475 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1476 { \
1477 int i; \
1478 int sat = 0; \
1479 ppc_avr_t result; \
1480 ppc_avr_t *a0 = PKBIG ? a : b; \
1481 ppc_avr_t *a1 = PKBIG ? b : a; \
1482 \
1483 VECTOR_FOR_INORDER_I(i, from) { \
1484 result.to[i] = cvt(a0->from[i], &sat); \
1485 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1486 } \
1487 *r = result; \
1488 if (dosat && sat) { \
1489 env->vscr |= (1 << VSCR_SAT); \
1490 } \
1491 }
1492#define I(x, y) (x)
1493VPK(shss, s16, s8, cvtshsb, 1)
1494VPK(shus, s16, u8, cvtshub, 1)
1495VPK(swss, s32, s16, cvtswsh, 1)
1496VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1497VPK(sdss, s64, s32, cvtsdsw, 1)
1498VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1499VPK(uhus, u16, u8, cvtuhub, 1)
1500VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1501VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1502VPK(uhum, u16, u8, I, 0)
1503VPK(uwum, u32, u16, I, 0)
024215b2 1504VPK(udum, u64, u32, I, 0)
64654ded
BS
1505#undef I
1506#undef VPK
1507#undef PKBIG
1508
d15f74fb 1509void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1510{
1511 int i;
1512
1513 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1514 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1515 }
1516}
1517
1518#define VRFI(suffix, rounding) \
d15f74fb
BS
1519 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1520 ppc_avr_t *b) \
64654ded
BS
1521 { \
1522 int i; \
1523 float_status s = env->vec_status; \
1524 \
1525 set_float_rounding_mode(rounding, &s); \
1526 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1527 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1528 } \
1529 }
1530VRFI(n, float_round_nearest_even)
1531VRFI(m, float_round_down)
1532VRFI(p, float_round_up)
1533VRFI(z, float_round_to_zero)
1534#undef VRFI
1535
818692ff 1536#define VROTATE(suffix, element, mask) \
64654ded
BS
1537 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1538 { \
1539 int i; \
1540 \
1541 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1542 unsigned int shift = b->element[i] & mask; \
1543 r->element[i] = (a->element[i] << shift) | \
1544 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1545 } \
1546 }
818692ff
TM
1547VROTATE(b, u8, 0x7)
1548VROTATE(h, u16, 0xF)
1549VROTATE(w, u32, 0x1F)
2fdf78e6 1550VROTATE(d, u64, 0x3F)
64654ded
BS
1551#undef VROTATE
1552
d15f74fb 1553void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1554{
1555 int i;
1556
1557 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1558 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1559
ef9bd150 1560 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1561 }
1562}
1563
d15f74fb
BS
1564void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1565 ppc_avr_t *c)
64654ded
BS
1566{
1567 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1568 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1569}
1570
d15f74fb 1571void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1572{
1573 int i;
1574
1575 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1576 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1577 }
1578}
1579
d15f74fb 1580void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1581{
1582 int i;
1583
1584 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1585 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1586 }
1587}
1588
64654ded
BS
1589/* The specification says that the results are undefined if all of the
1590 * shift counts are not identical. We check to make sure that they are
1591 * to conform to what real hardware appears to do. */
1592#define VSHIFT(suffix, leftp) \
1593 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1594 { \
1595 int shift = b->u8[LO_IDX*15] & 0x7; \
1596 int doit = 1; \
1597 int i; \
1598 \
1599 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1600 doit = doit && ((b->u8[i] & 0x7) == shift); \
1601 } \
1602 if (doit) { \
1603 if (shift == 0) { \
1604 *r = *a; \
1605 } else if (leftp) { \
1606 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1607 \
1608 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1609 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1610 } else { \
1611 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1612 \
1613 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1614 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1615 } \
1616 } \
1617 }
24e669ba
TM
1618VSHIFT(l, 1)
1619VSHIFT(r, 0)
64654ded 1620#undef VSHIFT
64654ded 1621
818692ff 1622#define VSL(suffix, element, mask) \
64654ded
BS
1623 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1624 { \
1625 int i; \
1626 \
1627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1628 unsigned int shift = b->element[i] & mask; \
1629 \
1630 r->element[i] = a->element[i] << shift; \
1631 } \
1632 }
818692ff
TM
1633VSL(b, u8, 0x7)
1634VSL(h, u16, 0x0F)
1635VSL(w, u32, 0x1F)
2fdf78e6 1636VSL(d, u64, 0x3F)
64654ded
BS
1637#undef VSL
1638
1639void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1640{
1641 int sh = shift & 0xf;
1642 int i;
1643 ppc_avr_t result;
1644
1645#if defined(HOST_WORDS_BIGENDIAN)
1646 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1647 int index = sh + i;
1648 if (index > 0xf) {
1649 result.u8[i] = b->u8[index - 0x10];
1650 } else {
1651 result.u8[i] = a->u8[index];
1652 }
1653 }
1654#else
1655 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1656 int index = (16 - sh) + i;
1657 if (index > 0xf) {
1658 result.u8[i] = a->u8[index - 0x10];
1659 } else {
1660 result.u8[i] = b->u8[index];
1661 }
1662 }
1663#endif
1664 *r = result;
1665}
1666
1667void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1668{
1669 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1670
1671#if defined(HOST_WORDS_BIGENDIAN)
1672 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1673 memset(&r->u8[16-sh], 0, sh);
1674#else
1675 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1676 memset(&r->u8[0], 0, sh);
1677#endif
1678}
1679
1680/* Experimental testing shows that hardware masks the immediate. */
1681#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1682#if defined(HOST_WORDS_BIGENDIAN)
1683#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1684#else
1685#define SPLAT_ELEMENT(element) \
1686 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1687#endif
1688#define VSPLT(suffix, element) \
1689 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1690 { \
1691 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1692 int i; \
1693 \
1694 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1695 r->element[i] = s; \
1696 } \
1697 }
1698VSPLT(b, u8)
1699VSPLT(h, u16)
1700VSPLT(w, u32)
1701#undef VSPLT
1702#undef SPLAT_ELEMENT
1703#undef _SPLAT_MASKED
1704
1705#define VSPLTI(suffix, element, splat_type) \
1706 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1707 { \
1708 splat_type x = (int8_t)(splat << 3) >> 3; \
1709 int i; \
1710 \
1711 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1712 r->element[i] = x; \
1713 } \
1714 }
1715VSPLTI(b, s8, int8_t)
1716VSPLTI(h, s16, int16_t)
1717VSPLTI(w, s32, int32_t)
1718#undef VSPLTI
1719
818692ff 1720#define VSR(suffix, element, mask) \
64654ded
BS
1721 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1722 { \
1723 int i; \
1724 \
1725 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 1726 unsigned int shift = b->element[i] & mask; \
64654ded
BS
1727 r->element[i] = a->element[i] >> shift; \
1728 } \
1729 }
818692ff
TM
1730VSR(ab, s8, 0x7)
1731VSR(ah, s16, 0xF)
1732VSR(aw, s32, 0x1F)
2fdf78e6 1733VSR(ad, s64, 0x3F)
818692ff
TM
1734VSR(b, u8, 0x7)
1735VSR(h, u16, 0xF)
1736VSR(w, u32, 0x1F)
2fdf78e6 1737VSR(d, u64, 0x3F)
64654ded
BS
1738#undef VSR
1739
1740void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1741{
1742 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1743
1744#if defined(HOST_WORDS_BIGENDIAN)
1745 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1746 memset(&r->u8[0], 0, sh);
1747#else
1748 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1749 memset(&r->u8[16 - sh], 0, sh);
1750#endif
1751}
1752
1753void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1754{
1755 int i;
1756
1757 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1758 r->u32[i] = a->u32[i] >= b->u32[i];
1759 }
1760}
1761
d15f74fb 1762void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1763{
1764 int64_t t;
1765 int i, upper;
1766 ppc_avr_t result;
1767 int sat = 0;
1768
1769#if defined(HOST_WORDS_BIGENDIAN)
1770 upper = ARRAY_SIZE(r->s32)-1;
1771#else
1772 upper = 0;
1773#endif
1774 t = (int64_t)b->s32[upper];
1775 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1776 t += a->s32[i];
1777 result.s32[i] = 0;
1778 }
1779 result.s32[upper] = cvtsdsw(t, &sat);
1780 *r = result;
1781
1782 if (sat) {
1783 env->vscr |= (1 << VSCR_SAT);
1784 }
1785}
1786
d15f74fb 1787void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1788{
1789 int i, j, upper;
1790 ppc_avr_t result;
1791 int sat = 0;
1792
1793#if defined(HOST_WORDS_BIGENDIAN)
1794 upper = 1;
1795#else
1796 upper = 0;
1797#endif
1798 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1799 int64_t t = (int64_t)b->s32[upper + i * 2];
1800
1801 result.u64[i] = 0;
1802 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1803 t += a->s32[2 * i + j];
1804 }
1805 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1806 }
1807
1808 *r = result;
1809 if (sat) {
1810 env->vscr |= (1 << VSCR_SAT);
1811 }
1812}
1813
d15f74fb 1814void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1815{
1816 int i, j;
1817 int sat = 0;
1818
1819 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1820 int64_t t = (int64_t)b->s32[i];
1821
1822 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1823 t += a->s8[4 * i + j];
1824 }
1825 r->s32[i] = cvtsdsw(t, &sat);
1826 }
1827
1828 if (sat) {
1829 env->vscr |= (1 << VSCR_SAT);
1830 }
1831}
1832
d15f74fb 1833void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1834{
1835 int sat = 0;
1836 int i;
1837
1838 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1839 int64_t t = (int64_t)b->s32[i];
1840
1841 t += a->s16[2 * i] + a->s16[2 * i + 1];
1842 r->s32[i] = cvtsdsw(t, &sat);
1843 }
1844
1845 if (sat) {
1846 env->vscr |= (1 << VSCR_SAT);
1847 }
1848}
1849
d15f74fb 1850void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1851{
1852 int i, j;
1853 int sat = 0;
1854
1855 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1856 uint64_t t = (uint64_t)b->u32[i];
1857
1858 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1859 t += a->u8[4 * i + j];
1860 }
1861 r->u32[i] = cvtuduw(t, &sat);
1862 }
1863
1864 if (sat) {
1865 env->vscr |= (1 << VSCR_SAT);
1866 }
1867}
1868
1869#if defined(HOST_WORDS_BIGENDIAN)
1870#define UPKHI 1
1871#define UPKLO 0
1872#else
1873#define UPKHI 0
1874#define UPKLO 1
1875#endif
1876#define VUPKPX(suffix, hi) \
1877 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1878 { \
1879 int i; \
1880 ppc_avr_t result; \
1881 \
1882 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1883 uint16_t e = b->u16[hi ? i : i+4]; \
1884 uint8_t a = (e >> 15) ? 0xff : 0; \
1885 uint8_t r = (e >> 10) & 0x1f; \
1886 uint8_t g = (e >> 5) & 0x1f; \
1887 uint8_t b = e & 0x1f; \
1888 \
1889 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1890 } \
1891 *r = result; \
1892 }
1893VUPKPX(lpx, UPKLO)
1894VUPKPX(hpx, UPKHI)
1895#undef VUPKPX
1896
1897#define VUPK(suffix, unpacked, packee, hi) \
1898 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1899 { \
1900 int i; \
1901 ppc_avr_t result; \
1902 \
1903 if (hi) { \
1904 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1905 result.unpacked[i] = b->packee[i]; \
1906 } \
1907 } else { \
1908 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1909 i++) { \
1910 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1911 } \
1912 } \
1913 *r = result; \
1914 }
1915VUPK(hsb, s16, s8, UPKHI)
1916VUPK(hsh, s32, s16, UPKHI)
4430e076 1917VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
1918VUPK(lsb, s16, s8, UPKLO)
1919VUPK(lsh, s32, s16, UPKLO)
4430e076 1920VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
1921#undef VUPK
1922#undef UPKHI
1923#undef UPKLO
1924
f293f04a
TM
1925#define VGENERIC_DO(name, element) \
1926 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1927 { \
1928 int i; \
1929 \
1930 VECTOR_FOR_INORDER_I(i, element) { \
1931 r->element[i] = name(b->element[i]); \
1932 } \
1933 }
1934
1935#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1936#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1937#define clzw(v) clz32((v))
1938#define clzd(v) clz64((v))
1939
1940VGENERIC_DO(clzb, u8)
1941VGENERIC_DO(clzh, u16)
1942VGENERIC_DO(clzw, u32)
1943VGENERIC_DO(clzd, u64)
1944
1945#undef clzb
1946#undef clzh
1947#undef clzw
1948#undef clzd
1949
e13500b3
TM
1950#define popcntb(v) ctpop8(v)
1951#define popcnth(v) ctpop16(v)
1952#define popcntw(v) ctpop32(v)
1953#define popcntd(v) ctpop64(v)
1954
1955VGENERIC_DO(popcntb, u8)
1956VGENERIC_DO(popcnth, u16)
1957VGENERIC_DO(popcntw, u32)
1958VGENERIC_DO(popcntd, u64)
1959
1960#undef popcntb
1961#undef popcnth
1962#undef popcntw
1963#undef popcntd
f293f04a
TM
1964
1965#undef VGENERIC_DO
1966
b41da4eb
TM
1967#if defined(HOST_WORDS_BIGENDIAN)
1968#define QW_ONE { .u64 = { 0, 1 } }
1969#else
1970#define QW_ONE { .u64 = { 1, 0 } }
1971#endif
1972
1973#ifndef CONFIG_INT128
1974
1975static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1976{
1977 t->u64[0] = ~a.u64[0];
1978 t->u64[1] = ~a.u64[1];
1979}
1980
1981static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1982{
1983 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1984 return -1;
1985 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1986 return 1;
1987 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1988 return -1;
1989 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1990 return 1;
1991 } else {
1992 return 0;
1993 }
1994}
1995
1996static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1997{
1998 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1999 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2000 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2001}
2002
2003static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2004{
2005 ppc_avr_t not_a;
2006 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2007 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2008 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2009 avr_qw_not(&not_a, a);
2010 return avr_qw_cmpu(not_a, b) < 0;
2011}
2012
2013#endif
2014
2015void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2016{
2017#ifdef CONFIG_INT128
2018 r->u128 = a->u128 + b->u128;
2019#else
2020 avr_qw_add(r, *a, *b);
2021#endif
2022}
2023
2024void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2025{
2026#ifdef CONFIG_INT128
2027 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2028#else
2029
2030 if (c->u64[LO_IDX] & 1) {
2031 ppc_avr_t tmp;
2032
2033 tmp.u64[HI_IDX] = 0;
2034 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2035 avr_qw_add(&tmp, *a, tmp);
2036 avr_qw_add(r, tmp, *b);
2037 } else {
2038 avr_qw_add(r, *a, *b);
2039 }
2040#endif
2041}
2042
2043void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2044{
2045#ifdef CONFIG_INT128
2046 r->u128 = (~a->u128 < b->u128);
2047#else
2048 ppc_avr_t not_a;
2049
2050 avr_qw_not(&not_a, *a);
2051
2052 r->u64[HI_IDX] = 0;
2053 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2054#endif
2055}
2056
2057void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2058{
2059#ifdef CONFIG_INT128
2060 int carry_out = (~a->u128 < b->u128);
2061 if (!carry_out && (c->u128 & 1)) {
2062 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2063 ((a->u128 != 0) || (b->u128 != 0));
2064 }
2065 r->u128 = carry_out;
2066#else
2067
2068 int carry_in = c->u64[LO_IDX] & 1;
2069 int carry_out = 0;
2070 ppc_avr_t tmp;
2071
2072 carry_out = avr_qw_addc(&tmp, *a, *b);
2073
2074 if (!carry_out && carry_in) {
2075 ppc_avr_t one = QW_ONE;
2076 carry_out = avr_qw_addc(&tmp, tmp, one);
2077 }
2078 r->u64[HI_IDX] = 0;
2079 r->u64[LO_IDX] = carry_out;
2080#endif
2081}
2082
2083void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2084{
2085#ifdef CONFIG_INT128
2086 r->u128 = a->u128 - b->u128;
2087#else
2088 ppc_avr_t tmp;
2089 ppc_avr_t one = QW_ONE;
2090
2091 avr_qw_not(&tmp, *b);
2092 avr_qw_add(&tmp, *a, tmp);
2093 avr_qw_add(r, tmp, one);
2094#endif
2095}
2096
2097void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2098{
2099#ifdef CONFIG_INT128
2100 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2101#else
2102 ppc_avr_t tmp, sum;
2103
2104 avr_qw_not(&tmp, *b);
2105 avr_qw_add(&sum, *a, tmp);
2106
2107 tmp.u64[HI_IDX] = 0;
2108 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2109 avr_qw_add(r, sum, tmp);
2110#endif
2111}
2112
2113void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2114{
2115#ifdef CONFIG_INT128
2116 r->u128 = (~a->u128 < ~b->u128) ||
2117 (a->u128 + ~b->u128 == (__uint128_t)-1);
2118#else
2119 int carry = (avr_qw_cmpu(*a, *b) > 0);
2120 if (!carry) {
2121 ppc_avr_t tmp;
2122 avr_qw_not(&tmp, *b);
2123 avr_qw_add(&tmp, *a, tmp);
2124 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2125 }
2126 r->u64[HI_IDX] = 0;
2127 r->u64[LO_IDX] = carry;
2128#endif
2129}
2130
2131void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2132{
2133#ifdef CONFIG_INT128
2134 r->u128 =
2135 (~a->u128 < ~b->u128) ||
2136 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2137#else
2138 int carry_in = c->u64[LO_IDX] & 1;
2139 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2140 if (!carry_out && carry_in) {
2141 ppc_avr_t tmp;
2142 avr_qw_not(&tmp, *b);
2143 avr_qw_add(&tmp, *a, tmp);
2144 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2145 }
2146
2147 r->u64[HI_IDX] = 0;
2148 r->u64[LO_IDX] = carry_out;
2149#endif
2150}
2151
e8f7b27b
TM
2152#define BCD_PLUS_PREF_1 0xC
2153#define BCD_PLUS_PREF_2 0xF
2154#define BCD_PLUS_ALT_1 0xA
2155#define BCD_NEG_PREF 0xD
2156#define BCD_NEG_ALT 0xB
2157#define BCD_PLUS_ALT_2 0xE
2158
2159#if defined(HOST_WORDS_BIGENDIAN)
2160#define BCD_DIG_BYTE(n) (15 - (n/2))
2161#else
2162#define BCD_DIG_BYTE(n) (n/2)
2163#endif
2164
2165static int bcd_get_sgn(ppc_avr_t *bcd)
2166{
2167 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2168 case BCD_PLUS_PREF_1:
2169 case BCD_PLUS_PREF_2:
2170 case BCD_PLUS_ALT_1:
2171 case BCD_PLUS_ALT_2:
2172 {
2173 return 1;
2174 }
2175
2176 case BCD_NEG_PREF:
2177 case BCD_NEG_ALT:
2178 {
2179 return -1;
2180 }
2181
2182 default:
2183 {
2184 return 0;
2185 }
2186 }
2187}
2188
2189static int bcd_preferred_sgn(int sgn, int ps)
2190{
2191 if (sgn >= 0) {
2192 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2193 } else {
2194 return BCD_NEG_PREF;
2195 }
2196}
2197
2198static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2199{
2200 uint8_t result;
2201 if (n & 1) {
2202 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2203 } else {
2204 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2205 }
2206
2207 if (unlikely(result > 9)) {
2208 *invalid = true;
2209 }
2210 return result;
2211}
2212
2213static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2214{
2215 if (n & 1) {
2216 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2217 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2218 } else {
2219 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2220 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2221 }
2222}
2223
2224static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2225{
2226 int i;
2227 int invalid = 0;
2228 for (i = 31; i > 0; i--) {
2229 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2230 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2231 if (unlikely(invalid)) {
3b163b01 2232 return 0; /* doesn't matter */
e8f7b27b
TM
2233 } else if (dig_a > dig_b) {
2234 return 1;
2235 } else if (dig_a < dig_b) {
2236 return -1;
2237 }
2238 }
2239
2240 return 0;
2241}
2242
2243static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2244 int *overflow)
2245{
2246 int carry = 0;
2247 int i;
2248 int is_zero = 1;
2249 for (i = 1; i <= 31; i++) {
2250 uint8_t digit = bcd_get_digit(a, i, invalid) +
2251 bcd_get_digit(b, i, invalid) + carry;
2252 is_zero &= (digit == 0);
2253 if (digit > 9) {
2254 carry = 1;
2255 digit -= 10;
2256 } else {
2257 carry = 0;
2258 }
2259
2260 bcd_put_digit(t, digit, i);
2261
2262 if (unlikely(*invalid)) {
2263 return -1;
2264 }
2265 }
2266
2267 *overflow = carry;
2268 return is_zero;
2269}
2270
2271static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2272 int *overflow)
2273{
2274 int carry = 0;
2275 int i;
2276 int is_zero = 1;
2277 for (i = 1; i <= 31; i++) {
2278 uint8_t digit = bcd_get_digit(a, i, invalid) -
2279 bcd_get_digit(b, i, invalid) + carry;
2280 is_zero &= (digit == 0);
2281 if (digit & 0x80) {
2282 carry = -1;
2283 digit += 10;
2284 } else {
2285 carry = 0;
2286 }
2287
2288 bcd_put_digit(t, digit, i);
2289
2290 if (unlikely(*invalid)) {
2291 return -1;
2292 }
2293 }
2294
2295 *overflow = carry;
2296 return is_zero;
2297}
2298
2299uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2300{
2301
2302 int sgna = bcd_get_sgn(a);
2303 int sgnb = bcd_get_sgn(b);
2304 int invalid = (sgna == 0) || (sgnb == 0);
2305 int overflow = 0;
2306 int zero = 0;
2307 uint32_t cr = 0;
2308 ppc_avr_t result = { .u64 = { 0, 0 } };
2309
2310 if (!invalid) {
2311 if (sgna == sgnb) {
2312 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2313 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
72189ea4 2314 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2315 } else if (bcd_cmp_mag(a, b) > 0) {
2316 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2317 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
72189ea4 2318 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2319 } else {
2320 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2321 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
72189ea4 2322 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2323 }
2324 }
2325
2326 if (unlikely(invalid)) {
2327 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
72189ea4 2328 cr = 1 << CRF_SO;
e8f7b27b 2329 } else if (overflow) {
72189ea4 2330 cr |= 1 << CRF_SO;
e8f7b27b 2331 } else if (zero) {
72189ea4 2332 cr = 1 << CRF_EQ;
e8f7b27b
TM
2333 }
2334
2335 *r = result;
2336
2337 return cr;
2338}
2339
2340uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2341{
2342 ppc_avr_t bcopy = *b;
2343 int sgnb = bcd_get_sgn(b);
2344 if (sgnb < 0) {
2345 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2346 } else if (sgnb > 0) {
2347 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2348 }
2349 /* else invalid ... defer to bcdadd code for proper handling */
2350
2351 return helper_bcdadd(r, a, &bcopy, ps);
2352}
f293f04a 2353
c1542453 2354void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2355{
2356 int i;
2357 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2358 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2359 }
2360}
2361
c1542453 2362void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2363{
65cf1f65 2364 ppc_avr_t result;
557d52fa 2365 int i;
557d52fa 2366
c1542453 2367 VECTOR_FOR_INORDER_I(i, u32) {
65cf1f65 2368 result.AVRW(i) = b->AVRW(i) ^
c1542453
TM
2369 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2370 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2371 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2372 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
557d52fa 2373 }
65cf1f65 2374 *r = result;
557d52fa
TM
2375}
2376
557d52fa
TM
2377void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2378{
65cf1f65 2379 ppc_avr_t result;
c1542453
TM
2380 int i;
2381
2382 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2383 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
c1542453 2384 }
65cf1f65 2385 *r = result;
557d52fa
TM
2386}
2387
2388void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2389{
2390 /* This differs from what is written in ISA V2.07. The RTL is */
2391 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2392 int i;
2393 ppc_avr_t tmp;
2394
2395 VECTOR_FOR_INORDER_I(i, u8) {
2396 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2397 }
2398
2399 VECTOR_FOR_INORDER_I(i, u32) {
2400 r->AVRW(i) =
2401 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2402 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2403 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2404 AES_imc[tmp.AVRB(4*i + 3)][3];
2405 }
557d52fa
TM
2406}
2407
2408void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2409{
65cf1f65 2410 ppc_avr_t result;
c1542453
TM
2411 int i;
2412
2413 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2414 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
c1542453 2415 }
65cf1f65 2416 *r = result;
557d52fa
TM
2417}
2418
57354f8f
TM
2419#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2420#if defined(HOST_WORDS_BIGENDIAN)
2421#define EL_IDX(i) (i)
2422#else
2423#define EL_IDX(i) (3 - (i))
2424#endif
2425
2426void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2427{
2428 int st = (st_six & 0x10) != 0;
2429 int six = st_six & 0xF;
2430 int i;
2431
2432 VECTOR_FOR_INORDER_I(i, u32) {
2433 if (st == 0) {
2434 if ((six & (0x8 >> i)) == 0) {
2435 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2436 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2437 (a->u32[EL_IDX(i)] >> 3);
2438 } else { /* six.bit[i] == 1 */
2439 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2440 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2441 (a->u32[EL_IDX(i)] >> 10);
2442 }
2443 } else { /* st == 1 */
2444 if ((six & (0x8 >> i)) == 0) {
2445 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2446 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2447 ROTRu32(a->u32[EL_IDX(i)], 22);
2448 } else { /* six.bit[i] == 1 */
2449 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2450 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2451 ROTRu32(a->u32[EL_IDX(i)], 25);
2452 }
2453 }
2454 }
2455}
2456
2457#undef ROTRu32
2458#undef EL_IDX
2459
2460#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2461#if defined(HOST_WORDS_BIGENDIAN)
2462#define EL_IDX(i) (i)
2463#else
2464#define EL_IDX(i) (1 - (i))
2465#endif
2466
2467void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2468{
2469 int st = (st_six & 0x10) != 0;
2470 int six = st_six & 0xF;
2471 int i;
2472
2473 VECTOR_FOR_INORDER_I(i, u64) {
2474 if (st == 0) {
2475 if ((six & (0x8 >> (2*i))) == 0) {
2476 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2477 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2478 (a->u64[EL_IDX(i)] >> 7);
2479 } else { /* six.bit[2*i] == 1 */
2480 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2481 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2482 (a->u64[EL_IDX(i)] >> 6);
2483 }
2484 } else { /* st == 1 */
2485 if ((six & (0x8 >> (2*i))) == 0) {
2486 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2487 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2488 ROTRu64(a->u64[EL_IDX(i)], 39);
2489 } else { /* six.bit[2*i] == 1 */
2490 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2491 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2492 ROTRu64(a->u64[EL_IDX(i)], 41);
2493 }
2494 }
2495 }
2496}
2497
2498#undef ROTRu64
2499#undef EL_IDX
2500
ac174549
TM
2501void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2502{
65cf1f65 2503 ppc_avr_t result;
ac174549 2504 int i;
65cf1f65 2505
ac174549
TM
2506 VECTOR_FOR_INORDER_I(i, u8) {
2507 int indexA = c->u8[i] >> 4;
2508 int indexB = c->u8[i] & 0xF;
2509#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 2510 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 2511#else
65cf1f65 2512 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
2513#endif
2514 }
65cf1f65 2515 *r = result;
ac174549
TM
2516}
2517
64654ded
BS
2518#undef VECTOR_FOR_INORDER_I
2519#undef HI_IDX
2520#undef LO_IDX
2521
2522/*****************************************************************************/
2523/* SPE extension helpers */
2524/* Use a table to make this quicker */
ea6c0dac 2525static const uint8_t hbrev[16] = {
64654ded
BS
2526 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2527 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2528};
2529
2530static inline uint8_t byte_reverse(uint8_t val)
2531{
2532 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2533}
2534
2535static inline uint32_t word_reverse(uint32_t val)
2536{
2537 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2538 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2539}
2540
2541#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2542target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2543{
2544 uint32_t a, b, d, mask;
2545
2546 mask = UINT32_MAX >> (32 - MASKBITS);
2547 a = arg1 & mask;
2548 b = arg2 & mask;
2549 d = word_reverse(1 + word_reverse(a | ~b));
2550 return (arg1 & ~mask) | (d & b);
2551}
2552
2553uint32_t helper_cntlsw32(uint32_t val)
2554{
2555 if (val & 0x80000000) {
2556 return clz32(~val);
2557 } else {
2558 return clz32(val);
2559 }
2560}
2561
2562uint32_t helper_cntlzw32(uint32_t val)
2563{
2564 return clz32(val);
2565}
2566
2567/* 440 specific */
d15f74fb
BS
2568target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2569 target_ulong low, uint32_t update_Rc)
64654ded
BS
2570{
2571 target_ulong mask;
2572 int i;
2573
2574 i = 1;
2575 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2576 if ((high & mask) == 0) {
2577 if (update_Rc) {
2578 env->crf[0] = 0x4;
2579 }
2580 goto done;
2581 }
2582 i++;
2583 }
2584 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2585 if ((low & mask) == 0) {
2586 if (update_Rc) {
2587 env->crf[0] = 0x8;
2588 }
2589 goto done;
2590 }
2591 i++;
2592 }
ebbd8b40 2593 i = 8;
64654ded
BS
2594 if (update_Rc) {
2595 env->crf[0] = 0x2;
2596 }
2597 done:
2598 env->xer = (env->xer & ~0x7F) | i;
2599 if (update_Rc) {
2600 env->crf[0] |= xer_so;
2601 }
2602 return i;
2603}