]> git.proxmox.com Git - mirror_qemu.git/blame - target-ppc/int_helper.c
vhost-vsock: convert VMSTATE_VIRTIO_DEVICE
[mirror_qemu.git] / target-ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
63c91552 21#include "exec/exec-all.h"
1de7afc9 22#include "qemu/host-utils.h"
2ef6175a 23#include "exec/helper-proto.h"
6f2945cd 24#include "crypto/aes.h"
64654ded
BS
25
26#include "helper_regs.h"
27/*****************************************************************************/
28/* Fixed point operations helpers */
64654ded 29
6a4fda33
TM
30target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
32{
33 uint64_t rt = 0;
34 int overflow = 0;
35
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
38
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
44 }
45
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
48 }
49
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
55 }
56 }
57
58 return (target_ulong)rt;
59}
60
a98eb9e9
TM
61target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
63{
64 int64_t rt = 0;
65 int overflow = 0;
66
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
69
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
76 }
77
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
80 }
81
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
87 }
88 }
89
90 return (target_ulong)rt;
91}
92
98d1eb27
TM
93#if defined(TARGET_PPC64)
94
95uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
96{
97 uint64_t rt = 0;
98 int overflow = 0;
99
100 overflow = divu128(&rt, &ra, rb);
101
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
104 }
105
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
111 }
112 }
113
114 return rt;
115}
116
e44259b6
TM
117uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118{
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
123
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
126 }
127
128 if (oe) {
129
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
134 }
135 }
136
137 return rt;
138}
139
98d1eb27
TM
140#endif
141
142
64654ded
BS
143target_ulong helper_cntlzw(target_ulong t)
144{
145 return clz32(t);
146}
147
b35344e4
ND
148target_ulong helper_cnttzw(target_ulong t)
149{
150 return ctz32(t);
151}
152
64654ded 153#if defined(TARGET_PPC64)
082ce330
ND
154/* if x = 0xab, returns 0xababababababababa */
155#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
156
157/* substract 1 from each byte, and with inverse, check if MSB is set at each
158 * byte.
159 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
160 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
161 */
162#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
163
164/* When you XOR the pattern and there is a match, that byte will be zero */
165#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
166
167uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
168{
169 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
170}
171
172#undef pattern
173#undef haszero
174#undef hasvalue
175
64654ded
BS
176target_ulong helper_cntlzd(target_ulong t)
177{
178 return clz64(t);
179}
e91d95b2
SD
180
181target_ulong helper_cnttzd(target_ulong t)
182{
183 return ctz64(t);
184}
fec5c62a
RB
185
186/* Return invalid random number.
187 *
188 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
189 * random number
190 */
191target_ulong helper_darn32(void)
192{
193 return -1;
194}
195
196target_ulong helper_darn64(void)
197{
198 return -1;
199}
200
64654ded
BS
201#endif
202
86ba37ed
TM
203#if defined(TARGET_PPC64)
204
205uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
206{
207 int i;
208 uint64_t ra = 0;
209
210 for (i = 0; i < 8; i++) {
211 int index = (rs >> (i*8)) & 0xFF;
212 if (index < 64) {
213 if (rb & (1ull << (63-index))) {
214 ra |= 1 << i;
215 }
216 }
217 }
218 return ra;
219}
220
221#endif
222
fcfda20f
AJ
223target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
224{
225 target_ulong mask = 0xff;
226 target_ulong ra = 0;
227 int i;
228
229 for (i = 0; i < sizeof(target_ulong); i++) {
230 if ((rs & mask) == (rb & mask)) {
231 ra |= mask;
232 }
233 mask <<= 8;
234 }
235 return ra;
236}
237
64654ded 238/* shift right arithmetic helper */
d15f74fb
BS
239target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
240 target_ulong shift)
64654ded
BS
241{
242 int32_t ret;
243
244 if (likely(!(shift & 0x20))) {
245 if (likely((uint32_t)shift != 0)) {
246 shift &= 0x1f;
247 ret = (int32_t)value >> shift;
248 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
da91a00f 249 env->ca = 0;
64654ded 250 } else {
da91a00f 251 env->ca = 1;
64654ded
BS
252 }
253 } else {
254 ret = (int32_t)value;
da91a00f 255 env->ca = 0;
64654ded
BS
256 }
257 } else {
258 ret = (int32_t)value >> 31;
da91a00f 259 env->ca = (ret != 0);
64654ded
BS
260 }
261 return (target_long)ret;
262}
263
264#if defined(TARGET_PPC64)
d15f74fb
BS
265target_ulong helper_srad(CPUPPCState *env, target_ulong value,
266 target_ulong shift)
64654ded
BS
267{
268 int64_t ret;
269
270 if (likely(!(shift & 0x40))) {
271 if (likely((uint64_t)shift != 0)) {
272 shift &= 0x3f;
273 ret = (int64_t)value >> shift;
4bc02e23 274 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
da91a00f 275 env->ca = 0;
64654ded 276 } else {
da91a00f 277 env->ca = 1;
64654ded
BS
278 }
279 } else {
280 ret = (int64_t)value;
da91a00f 281 env->ca = 0;
64654ded
BS
282 }
283 } else {
284 ret = (int64_t)value >> 63;
da91a00f 285 env->ca = (ret != 0);
64654ded
BS
286 }
287 return ret;
288}
289#endif
290
291#if defined(TARGET_PPC64)
292target_ulong helper_popcntb(target_ulong val)
293{
294 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
295 0x5555555555555555ULL);
296 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
297 0x3333333333333333ULL);
298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
299 0x0f0f0f0f0f0f0f0fULL);
300 return val;
301}
302
303target_ulong helper_popcntw(target_ulong val)
304{
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316}
317
318target_ulong helper_popcntd(target_ulong val)
319{
320 return ctpop64(val);
321}
322#else
323target_ulong helper_popcntb(target_ulong val)
324{
325 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
326 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
327 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
328 return val;
329}
330
331target_ulong helper_popcntw(target_ulong val)
332{
333 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
334 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
335 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
336 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
337 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
338 return val;
339}
340#endif
341
342/*****************************************************************************/
343/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 344target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
345{
346 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
347
348 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
349 (int32_t)arg2 == 0) {
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->spr[SPR_MQ] = tmp % arg2;
354 return tmp / (int32_t)arg2;
355 }
356}
357
d15f74fb
BS
358target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
359 target_ulong arg2)
64654ded
BS
360{
361 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
362
363 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
da91a00f 365 env->so = env->ov = 1;
64654ded
BS
366 env->spr[SPR_MQ] = 0;
367 return INT32_MIN;
368 } else {
369 env->spr[SPR_MQ] = tmp % arg2;
370 tmp /= (int32_t)arg2;
371 if ((int32_t)tmp != tmp) {
da91a00f 372 env->so = env->ov = 1;
64654ded 373 } else {
da91a00f 374 env->ov = 0;
64654ded
BS
375 }
376 return tmp;
377 }
378}
379
d15f74fb
BS
380target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
381 target_ulong arg2)
64654ded
BS
382{
383 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
384 (int32_t)arg2 == 0) {
385 env->spr[SPR_MQ] = 0;
386 return INT32_MIN;
387 } else {
388 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
389 return (int32_t)arg1 / (int32_t)arg2;
390 }
391}
392
d15f74fb
BS
393target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
394 target_ulong arg2)
64654ded
BS
395{
396 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
397 (int32_t)arg2 == 0) {
da91a00f 398 env->so = env->ov = 1;
64654ded
BS
399 env->spr[SPR_MQ] = 0;
400 return INT32_MIN;
401 } else {
da91a00f 402 env->ov = 0;
64654ded
BS
403 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
404 return (int32_t)arg1 / (int32_t)arg2;
405 }
406}
407
408/*****************************************************************************/
409/* 602 specific instructions */
410/* mfrom is the most crazy instruction ever seen, imho ! */
411/* Real implementation uses a ROM table. Do the same */
412/* Extremely decomposed:
413 * -arg / 256
414 * return 256 * log10(10 + 1.0) + 0.5
415 */
416#if !defined(CONFIG_USER_ONLY)
417target_ulong helper_602_mfrom(target_ulong arg)
418{
419 if (likely(arg < 602)) {
420#include "mfrom_table.c"
421 return mfrom_ROM_table[arg];
422 } else {
423 return 0;
424 }
425}
426#endif
427
428/*****************************************************************************/
429/* Altivec extension helpers */
430#if defined(HOST_WORDS_BIGENDIAN)
431#define HI_IDX 0
432#define LO_IDX 1
c1542453
TM
433#define AVRB(i) u8[i]
434#define AVRW(i) u32[i]
64654ded
BS
435#else
436#define HI_IDX 1
437#define LO_IDX 0
c1542453
TM
438#define AVRB(i) u8[15-(i)]
439#define AVRW(i) u32[3-(i)]
64654ded
BS
440#endif
441
442#if defined(HOST_WORDS_BIGENDIAN)
443#define VECTOR_FOR_INORDER_I(index, element) \
444 for (index = 0; index < ARRAY_SIZE(r->element); index++)
445#else
446#define VECTOR_FOR_INORDER_I(index, element) \
447 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
448#endif
449
64654ded
BS
450/* Saturating arithmetic helpers. */
451#define SATCVT(from, to, from_type, to_type, min, max) \
452 static inline to_type cvt##from##to(from_type x, int *sat) \
453 { \
454 to_type r; \
455 \
456 if (x < (from_type)min) { \
457 r = min; \
458 *sat = 1; \
459 } else if (x > (from_type)max) { \
460 r = max; \
461 *sat = 1; \
462 } else { \
463 r = x; \
464 } \
465 return r; \
466 }
467#define SATCVTU(from, to, from_type, to_type, min, max) \
468 static inline to_type cvt##from##to(from_type x, int *sat) \
469 { \
470 to_type r; \
471 \
472 if (x > (from_type)max) { \
473 r = max; \
474 *sat = 1; \
475 } else { \
476 r = x; \
477 } \
478 return r; \
479 }
480SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
481SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
482SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
483
484SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
485SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
486SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
487SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
488SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
489SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
490#undef SATCVT
491#undef SATCVTU
492
493void helper_lvsl(ppc_avr_t *r, target_ulong sh)
494{
495 int i, j = (sh & 0xf);
496
497 VECTOR_FOR_INORDER_I(i, u8) {
498 r->u8[i] = j++;
499 }
500}
501
502void helper_lvsr(ppc_avr_t *r, target_ulong sh)
503{
504 int i, j = 0x10 - (sh & 0xf);
505
506 VECTOR_FOR_INORDER_I(i, u8) {
507 r->u8[i] = j++;
508 }
509}
510
d15f74fb 511void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
512{
513#if defined(HOST_WORDS_BIGENDIAN)
514 env->vscr = r->u32[3];
515#else
516 env->vscr = r->u32[0];
517#endif
518 set_flush_to_zero(vscr_nj, &env->vec_status);
519}
520
521void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
522{
523 int i;
524
525 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
526 r->u32[i] = ~a->u32[i] < b->u32[i];
527 }
528}
529
530#define VARITH_DO(name, op, element) \
531 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
532 { \
533 int i; \
534 \
535 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
536 r->element[i] = a->element[i] op b->element[i]; \
537 } \
538 }
539#define VARITH(suffix, element) \
540 VARITH_DO(add##suffix, +, element) \
541 VARITH_DO(sub##suffix, -, element)
542VARITH(ubm, u8)
543VARITH(uhm, u16)
544VARITH(uwm, u32)
56eabc75 545VARITH(udm, u64)
953f0f58 546VARITH_DO(muluwm, *, u32)
64654ded
BS
547#undef VARITH_DO
548#undef VARITH
549
550#define VARITHFP(suffix, func) \
d15f74fb
BS
551 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 ppc_avr_t *b) \
64654ded
BS
553 { \
554 int i; \
555 \
556 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 557 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
558 } \
559 }
560VARITHFP(addfp, float32_add)
561VARITHFP(subfp, float32_sub)
db1babb8
AJ
562VARITHFP(minfp, float32_min)
563VARITHFP(maxfp, float32_max)
64654ded
BS
564#undef VARITHFP
565
2f93c23f
AJ
566#define VARITHFPFMA(suffix, type) \
567 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
568 ppc_avr_t *b, ppc_avr_t *c) \
569 { \
570 int i; \
571 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
572 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
573 type, &env->vec_status); \
574 } \
575 }
576VARITHFPFMA(maddfp, 0);
577VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
578#undef VARITHFPFMA
579
64654ded
BS
580#define VARITHSAT_CASE(type, op, cvt, element) \
581 { \
582 type result = (type)a->element[i] op (type)b->element[i]; \
583 r->element[i] = cvt(result, &sat); \
584 }
585
586#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
587 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
588 ppc_avr_t *b) \
64654ded
BS
589 { \
590 int sat = 0; \
591 int i; \
592 \
593 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
594 switch (sizeof(r->element[0])) { \
595 case 1: \
596 VARITHSAT_CASE(optype, op, cvt, element); \
597 break; \
598 case 2: \
599 VARITHSAT_CASE(optype, op, cvt, element); \
600 break; \
601 case 4: \
602 VARITHSAT_CASE(optype, op, cvt, element); \
603 break; \
604 } \
605 } \
606 if (sat) { \
607 env->vscr |= (1 << VSCR_SAT); \
608 } \
609 }
610#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
611 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
612 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
613#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
614 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
615 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
616VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
617VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
618VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
619VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
620VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
621VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
622#undef VARITHSAT_CASE
623#undef VARITHSAT_DO
624#undef VARITHSAT_SIGNED
625#undef VARITHSAT_UNSIGNED
626
627#define VAVG_DO(name, element, etype) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
629 { \
630 int i; \
631 \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
634 r->element[i] = x >> 1; \
635 } \
636 }
637
638#define VAVG(type, signed_element, signed_type, unsigned_element, \
639 unsigned_type) \
640 VAVG_DO(avgs##type, signed_element, signed_type) \
641 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
642VAVG(b, s8, int16_t, u8, uint16_t)
643VAVG(h, s16, int32_t, u16, uint32_t)
644VAVG(w, s32, int64_t, u32, uint64_t)
645#undef VAVG_DO
646#undef VAVG
647
37707059
SD
648#define VABSDU_DO(name, element) \
649void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
650{ \
651 int i; \
652 \
653 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
654 r->element[i] = (a->element[i] > b->element[i]) ? \
655 (a->element[i] - b->element[i]) : \
656 (b->element[i] - a->element[i]); \
657 } \
658}
659
660/* VABSDU - Vector absolute difference unsigned
661 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
662 * element - element type to access from vector
663 */
664#define VABSDU(type, element) \
665 VABSDU_DO(absdu##type, element)
666VABSDU(b, u8)
667VABSDU(h, u16)
668VABSDU(w, u32)
669#undef VABSDU_DO
670#undef VABSDU
671
64654ded 672#define VCF(suffix, cvt, element) \
d15f74fb
BS
673 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
674 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
675 { \
676 int i; \
677 \
678 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
679 float32 t = cvt(b->element[i], &env->vec_status); \
680 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
681 } \
682 }
683VCF(ux, uint32_to_float32, u32)
684VCF(sx, int32_to_float32, s32)
685#undef VCF
686
687#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
688 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
689 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 690 { \
6f3dab41
TM
691 uint64_t ones = (uint64_t)-1; \
692 uint64_t all = ones; \
693 uint64_t none = 0; \
64654ded
BS
694 int i; \
695 \
696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 697 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
698 ones : 0x0); \
699 switch (sizeof(a->element[0])) { \
6f3dab41
TM
700 case 8: \
701 r->u64[i] = result; \
702 break; \
64654ded
BS
703 case 4: \
704 r->u32[i] = result; \
705 break; \
706 case 2: \
707 r->u16[i] = result; \
708 break; \
709 case 1: \
710 r->u8[i] = result; \
711 break; \
712 } \
713 all &= result; \
714 none |= result; \
715 } \
716 if (record) { \
717 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
718 } \
719 }
720#define VCMP(suffix, compare, element) \
721 VCMP_DO(suffix, compare, element, 0) \
722 VCMP_DO(suffix##_dot, compare, element, 1)
723VCMP(equb, ==, u8)
724VCMP(equh, ==, u16)
725VCMP(equw, ==, u32)
6f3dab41 726VCMP(equd, ==, u64)
64654ded
BS
727VCMP(gtub, >, u8)
728VCMP(gtuh, >, u16)
729VCMP(gtuw, >, u32)
6f3dab41 730VCMP(gtud, >, u64)
64654ded
BS
731VCMP(gtsb, >, s8)
732VCMP(gtsh, >, s16)
733VCMP(gtsw, >, s32)
6f3dab41 734VCMP(gtsd, >, s64)
64654ded
BS
735#undef VCMP_DO
736#undef VCMP
737
0fa59364
RS
738#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
739void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
f7cc8466
SB
740 ppc_avr_t *a, ppc_avr_t *b) \
741{ \
742 etype ones = (etype)-1; \
743 etype all = ones; \
0fa59364 744 etype result, none = 0; \
f7cc8466
SB
745 int i; \
746 \
747 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
0fa59364
RS
748 if (cmpzero) { \
749 result = ((a->element[i] == 0) \
f7cc8466
SB
750 || (b->element[i] == 0) \
751 || (a->element[i] != b->element[i]) ? \
752 ones : 0x0); \
0fa59364
RS
753 } else { \
754 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
755 } \
f7cc8466
SB
756 r->element[i] = result; \
757 all &= result; \
758 none |= result; \
759 } \
760 if (record) { \
761 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
762 } \
763}
764
765/* VCMPNEZ - Vector compare not equal to zero
766 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
767 * element - element type to access from vector
768 */
0fa59364
RS
769#define VCMPNE(suffix, element, etype, cmpzero) \
770 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
771 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
772VCMPNE(zb, u8, uint8_t, 1)
773VCMPNE(zh, u16, uint16_t, 1)
774VCMPNE(zw, u32, uint32_t, 1)
775VCMPNE(b, u8, uint8_t, 0)
776VCMPNE(h, u16, uint16_t, 0)
777VCMPNE(w, u32, uint32_t, 0)
778#undef VCMPNE_DO
779#undef VCMPNE
f7cc8466 780
64654ded 781#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
782 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
783 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
784 { \
785 uint32_t ones = (uint32_t)-1; \
786 uint32_t all = ones; \
787 uint32_t none = 0; \
788 int i; \
789 \
790 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
791 uint32_t result; \
792 int rel = float32_compare_quiet(a->f[i], b->f[i], \
793 &env->vec_status); \
794 if (rel == float_relation_unordered) { \
795 result = 0; \
796 } else if (rel compare order) { \
797 result = ones; \
798 } else { \
799 result = 0; \
800 } \
801 r->u32[i] = result; \
802 all &= result; \
803 none |= result; \
804 } \
805 if (record) { \
806 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
807 } \
808 }
809#define VCMPFP(suffix, compare, order) \
810 VCMPFP_DO(suffix, compare, order, 0) \
811 VCMPFP_DO(suffix##_dot, compare, order, 1)
812VCMPFP(eqfp, ==, float_relation_equal)
813VCMPFP(gefp, !=, float_relation_less)
814VCMPFP(gtfp, ==, float_relation_greater)
815#undef VCMPFP_DO
816#undef VCMPFP
817
d15f74fb
BS
818static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
819 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
820{
821 int i;
822 int all_in = 0;
823
824 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
825 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
826 if (le_rel == float_relation_unordered) {
827 r->u32[i] = 0xc0000000;
4007b8de 828 all_in = 1;
64654ded
BS
829 } else {
830 float32 bneg = float32_chs(b->f[i]);
831 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
832 int le = le_rel != float_relation_greater;
833 int ge = ge_rel != float_relation_less;
834
835 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
836 all_in |= (!le | !ge);
837 }
838 }
839 if (record) {
840 env->crf[6] = (all_in == 0) << 1;
841 }
842}
843
d15f74fb 844void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 845{
d15f74fb 846 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
847}
848
d15f74fb
BS
849void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
850 ppc_avr_t *b)
64654ded 851{
d15f74fb 852 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
853}
854
855#define VCT(suffix, satcvt, element) \
d15f74fb
BS
856 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
857 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
858 { \
859 int i; \
860 int sat = 0; \
861 float_status s = env->vec_status; \
862 \
863 set_float_rounding_mode(float_round_to_zero, &s); \
864 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
865 if (float32_is_any_nan(b->f[i])) { \
866 r->element[i] = 0; \
867 } else { \
868 float64 t = float32_to_float64(b->f[i], &s); \
869 int64_t j; \
870 \
871 t = float64_scalbn(t, uim, &s); \
872 j = float64_to_int64(t, &s); \
873 r->element[i] = satcvt(j, &sat); \
874 } \
875 } \
876 if (sat) { \
877 env->vscr |= (1 << VSCR_SAT); \
878 } \
879 }
880VCT(uxs, cvtsduw, u32)
881VCT(sxs, cvtsdsw, s32)
882#undef VCT
883
4879538c
RS
884target_ulong helper_vclzlsbb(ppc_avr_t *r)
885{
886 target_ulong count = 0;
887 int i;
888 VECTOR_FOR_INORDER_I(i, u8) {
889 if (r->u8[i] & 0x01) {
890 break;
891 }
892 count++;
893 }
894 return count;
895}
896
897target_ulong helper_vctzlsbb(ppc_avr_t *r)
898{
899 target_ulong count = 0;
900 int i;
901#if defined(HOST_WORDS_BIGENDIAN)
902 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
903#else
904 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
905#endif
906 if (r->u8[i] & 0x01) {
907 break;
908 }
909 count++;
910 }
911 return count;
912}
913
d15f74fb
BS
914void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
915 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
916{
917 int sat = 0;
918 int i;
919
920 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
921 int32_t prod = a->s16[i] * b->s16[i];
922 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
923
924 r->s16[i] = cvtswsh(t, &sat);
925 }
926
927 if (sat) {
928 env->vscr |= (1 << VSCR_SAT);
929 }
930}
931
d15f74fb
BS
932void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
933 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
934{
935 int sat = 0;
936 int i;
937
938 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
939 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
940 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
941 r->s16[i] = cvtswsh(t, &sat);
942 }
943
944 if (sat) {
945 env->vscr |= (1 << VSCR_SAT);
946 }
947}
948
949#define VMINMAX_DO(name, compare, element) \
950 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
951 { \
952 int i; \
953 \
954 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
955 if (a->element[i] compare b->element[i]) { \
956 r->element[i] = b->element[i]; \
957 } else { \
958 r->element[i] = a->element[i]; \
959 } \
960 } \
961 }
962#define VMINMAX(suffix, element) \
963 VMINMAX_DO(min##suffix, >, element) \
964 VMINMAX_DO(max##suffix, <, element)
965VMINMAX(sb, s8)
966VMINMAX(sh, s16)
967VMINMAX(sw, s32)
8203e31b 968VMINMAX(sd, s64)
64654ded
BS
969VMINMAX(ub, u8)
970VMINMAX(uh, u16)
971VMINMAX(uw, u32)
8203e31b 972VMINMAX(ud, u64)
64654ded
BS
973#undef VMINMAX_DO
974#undef VMINMAX
975
64654ded
BS
976void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
977{
978 int i;
979
980 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
981 int32_t prod = a->s16[i] * b->s16[i];
982 r->s16[i] = (int16_t) (prod + c->s16[i]);
983 }
984}
985
986#define VMRG_DO(name, element, highp) \
987 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
988 { \
989 ppc_avr_t result; \
990 int i; \
991 size_t n_elems = ARRAY_SIZE(r->element); \
992 \
993 for (i = 0; i < n_elems / 2; i++) { \
994 if (highp) { \
995 result.element[i*2+HI_IDX] = a->element[i]; \
996 result.element[i*2+LO_IDX] = b->element[i]; \
997 } else { \
998 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
999 b->element[n_elems - i - 1]; \
1000 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1001 a->element[n_elems - i - 1]; \
1002 } \
1003 } \
1004 *r = result; \
1005 }
1006#if defined(HOST_WORDS_BIGENDIAN)
1007#define MRGHI 0
1008#define MRGLO 1
1009#else
1010#define MRGHI 1
1011#define MRGLO 0
1012#endif
1013#define VMRG(suffix, element) \
1014 VMRG_DO(mrgl##suffix, element, MRGHI) \
1015 VMRG_DO(mrgh##suffix, element, MRGLO)
1016VMRG(b, u8)
1017VMRG(h, u16)
1018VMRG(w, u32)
1019#undef VMRG_DO
1020#undef VMRG
1021#undef MRGHI
1022#undef MRGLO
1023
d15f74fb
BS
1024void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1025 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1026{
1027 int32_t prod[16];
1028 int i;
1029
1030 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1031 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1032 }
1033
1034 VECTOR_FOR_INORDER_I(i, s32) {
1035 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1036 prod[4 * i + 2] + prod[4 * i + 3];
1037 }
1038}
1039
d15f74fb
BS
1040void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1041 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1042{
1043 int32_t prod[8];
1044 int i;
1045
1046 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1047 prod[i] = a->s16[i] * b->s16[i];
1048 }
1049
1050 VECTOR_FOR_INORDER_I(i, s32) {
1051 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1052 }
1053}
1054
d15f74fb
BS
1055void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1056 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1057{
1058 int32_t prod[8];
1059 int i;
1060 int sat = 0;
1061
1062 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1063 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1064 }
1065
1066 VECTOR_FOR_INORDER_I(i, s32) {
1067 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1068
1069 r->u32[i] = cvtsdsw(t, &sat);
1070 }
1071
1072 if (sat) {
1073 env->vscr |= (1 << VSCR_SAT);
1074 }
1075}
1076
d15f74fb
BS
1077void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1078 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1079{
1080 uint16_t prod[16];
1081 int i;
1082
1083 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1084 prod[i] = a->u8[i] * b->u8[i];
1085 }
1086
1087 VECTOR_FOR_INORDER_I(i, u32) {
1088 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1089 prod[4 * i + 2] + prod[4 * i + 3];
1090 }
1091}
1092
d15f74fb
BS
1093void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1094 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1095{
1096 uint32_t prod[8];
1097 int i;
1098
1099 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1100 prod[i] = a->u16[i] * b->u16[i];
1101 }
1102
1103 VECTOR_FOR_INORDER_I(i, u32) {
1104 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1105 }
1106}
1107
d15f74fb
BS
1108void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1109 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1110{
1111 uint32_t prod[8];
1112 int i;
1113 int sat = 0;
1114
1115 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1116 prod[i] = a->u16[i] * b->u16[i];
1117 }
1118
1119 VECTOR_FOR_INORDER_I(i, s32) {
1120 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1121
1122 r->u32[i] = cvtuduw(t, &sat);
1123 }
1124
1125 if (sat) {
1126 env->vscr |= (1 << VSCR_SAT);
1127 }
1128}
1129
aa9e930c 1130#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1131 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1132 { \
1133 int i; \
1134 \
1135 VECTOR_FOR_INORDER_I(i, prod_element) { \
1136 if (evenp) { \
aa9e930c
TM
1137 r->prod_element[i] = \
1138 (cast)a->mul_element[i * 2 + HI_IDX] * \
1139 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1140 } else { \
aa9e930c
TM
1141 r->prod_element[i] = \
1142 (cast)a->mul_element[i * 2 + LO_IDX] * \
1143 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1144 } \
1145 } \
1146 }
aa9e930c
TM
1147#define VMUL(suffix, mul_element, prod_element, cast) \
1148 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1149 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1150VMUL(sb, s8, s16, int16_t)
1151VMUL(sh, s16, s32, int32_t)
63be0936 1152VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1153VMUL(ub, u8, u16, uint16_t)
1154VMUL(uh, u16, u32, uint32_t)
63be0936 1155VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1156#undef VMUL_DO
1157#undef VMUL
1158
d15f74fb
BS
1159void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1160 ppc_avr_t *c)
64654ded
BS
1161{
1162 ppc_avr_t result;
1163 int i;
1164
1165 VECTOR_FOR_INORDER_I(i, u8) {
1166 int s = c->u8[i] & 0x1f;
1167#if defined(HOST_WORDS_BIGENDIAN)
1168 int index = s & 0xf;
1169#else
1170 int index = 15 - (s & 0xf);
1171#endif
1172
1173 if (s & 0x10) {
1174 result.u8[i] = b->u8[index];
1175 } else {
1176 result.u8[i] = a->u8[index];
1177 }
1178 }
1179 *r = result;
1180}
1181
ab045436
RS
1182void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1183 ppc_avr_t *c)
1184{
1185 ppc_avr_t result;
1186 int i;
1187
1188 VECTOR_FOR_INORDER_I(i, u8) {
1189 int s = c->u8[i] & 0x1f;
1190#if defined(HOST_WORDS_BIGENDIAN)
1191 int index = 15 - (s & 0xf);
1192#else
1193 int index = s & 0xf;
1194#endif
1195
1196 if (s & 0x10) {
1197 result.u8[i] = a->u8[index];
1198 } else {
1199 result.u8[i] = b->u8[index];
1200 }
1201 }
1202 *r = result;
1203}
1204
4d82038e
TM
1205#if defined(HOST_WORDS_BIGENDIAN)
1206#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1207#define VBPERMD_INDEX(i) (i)
4d82038e 1208#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
01fe9a47 1209#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
4d82038e
TM
1210#else
1211#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
01fe9a47 1212#define VBPERMD_INDEX(i) (1 - i)
4d82038e 1213#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
01fe9a47
RS
1214#define EXTRACT_BIT(avr, i, index) \
1215 (extract64((avr)->u64[1 - i], 63 - index, 1))
4d82038e
TM
1216#endif
1217
01fe9a47
RS
1218void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1219{
1220 int i, j;
1221 ppc_avr_t result = { .u64 = { 0, 0 } };
1222 VECTOR_FOR_INORDER_I(i, u64) {
1223 for (j = 0; j < 8; j++) {
1224 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1225 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1226 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1227 }
1228 }
1229 }
1230 *r = result;
1231}
1232
4d82038e
TM
1233void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1234{
1235 int i;
1236 uint64_t perm = 0;
1237
1238 VECTOR_FOR_INORDER_I(i, u8) {
1239 int index = VBPERMQ_INDEX(b, i);
1240
1241 if (index < 128) {
1242 uint64_t mask = (1ull << (63-(index & 0x3F)));
1243 if (a->u64[VBPERMQ_DW(index)] & mask) {
1244 perm |= (0x8000 >> i);
1245 }
1246 }
1247 }
1248
1249 r->u64[HI_IDX] = perm;
1250 r->u64[LO_IDX] = 0;
1251}
1252
1253#undef VBPERMQ_INDEX
1254#undef VBPERMQ_DW
1255
cfd54a04 1256static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1257 0x0000000000000000ull, /* 00 */
1258 0x0000000000000080ull, /* 01 */
1259 0x0000000000008000ull, /* 02 */
1260 0x0000000000008080ull, /* 03 */
1261 0x0000000000800000ull, /* 04 */
1262 0x0000000000800080ull, /* 05 */
1263 0x0000000000808000ull, /* 06 */
1264 0x0000000000808080ull, /* 07 */
1265 0x0000000080000000ull, /* 08 */
1266 0x0000000080000080ull, /* 09 */
1267 0x0000000080008000ull, /* 0A */
1268 0x0000000080008080ull, /* 0B */
1269 0x0000000080800000ull, /* 0C */
1270 0x0000000080800080ull, /* 0D */
1271 0x0000000080808000ull, /* 0E */
1272 0x0000000080808080ull, /* 0F */
1273 0x0000008000000000ull, /* 10 */
1274 0x0000008000000080ull, /* 11 */
1275 0x0000008000008000ull, /* 12 */
1276 0x0000008000008080ull, /* 13 */
1277 0x0000008000800000ull, /* 14 */
1278 0x0000008000800080ull, /* 15 */
1279 0x0000008000808000ull, /* 16 */
1280 0x0000008000808080ull, /* 17 */
1281 0x0000008080000000ull, /* 18 */
1282 0x0000008080000080ull, /* 19 */
1283 0x0000008080008000ull, /* 1A */
1284 0x0000008080008080ull, /* 1B */
1285 0x0000008080800000ull, /* 1C */
1286 0x0000008080800080ull, /* 1D */
1287 0x0000008080808000ull, /* 1E */
1288 0x0000008080808080ull, /* 1F */
1289 0x0000800000000000ull, /* 20 */
1290 0x0000800000000080ull, /* 21 */
1291 0x0000800000008000ull, /* 22 */
1292 0x0000800000008080ull, /* 23 */
1293 0x0000800000800000ull, /* 24 */
1294 0x0000800000800080ull, /* 25 */
1295 0x0000800000808000ull, /* 26 */
1296 0x0000800000808080ull, /* 27 */
1297 0x0000800080000000ull, /* 28 */
1298 0x0000800080000080ull, /* 29 */
1299 0x0000800080008000ull, /* 2A */
1300 0x0000800080008080ull, /* 2B */
1301 0x0000800080800000ull, /* 2C */
1302 0x0000800080800080ull, /* 2D */
1303 0x0000800080808000ull, /* 2E */
1304 0x0000800080808080ull, /* 2F */
1305 0x0000808000000000ull, /* 30 */
1306 0x0000808000000080ull, /* 31 */
1307 0x0000808000008000ull, /* 32 */
1308 0x0000808000008080ull, /* 33 */
1309 0x0000808000800000ull, /* 34 */
1310 0x0000808000800080ull, /* 35 */
1311 0x0000808000808000ull, /* 36 */
1312 0x0000808000808080ull, /* 37 */
1313 0x0000808080000000ull, /* 38 */
1314 0x0000808080000080ull, /* 39 */
1315 0x0000808080008000ull, /* 3A */
1316 0x0000808080008080ull, /* 3B */
1317 0x0000808080800000ull, /* 3C */
1318 0x0000808080800080ull, /* 3D */
1319 0x0000808080808000ull, /* 3E */
1320 0x0000808080808080ull, /* 3F */
1321 0x0080000000000000ull, /* 40 */
1322 0x0080000000000080ull, /* 41 */
1323 0x0080000000008000ull, /* 42 */
1324 0x0080000000008080ull, /* 43 */
1325 0x0080000000800000ull, /* 44 */
1326 0x0080000000800080ull, /* 45 */
1327 0x0080000000808000ull, /* 46 */
1328 0x0080000000808080ull, /* 47 */
1329 0x0080000080000000ull, /* 48 */
1330 0x0080000080000080ull, /* 49 */
1331 0x0080000080008000ull, /* 4A */
1332 0x0080000080008080ull, /* 4B */
1333 0x0080000080800000ull, /* 4C */
1334 0x0080000080800080ull, /* 4D */
1335 0x0080000080808000ull, /* 4E */
1336 0x0080000080808080ull, /* 4F */
1337 0x0080008000000000ull, /* 50 */
1338 0x0080008000000080ull, /* 51 */
1339 0x0080008000008000ull, /* 52 */
1340 0x0080008000008080ull, /* 53 */
1341 0x0080008000800000ull, /* 54 */
1342 0x0080008000800080ull, /* 55 */
1343 0x0080008000808000ull, /* 56 */
1344 0x0080008000808080ull, /* 57 */
1345 0x0080008080000000ull, /* 58 */
1346 0x0080008080000080ull, /* 59 */
1347 0x0080008080008000ull, /* 5A */
1348 0x0080008080008080ull, /* 5B */
1349 0x0080008080800000ull, /* 5C */
1350 0x0080008080800080ull, /* 5D */
1351 0x0080008080808000ull, /* 5E */
1352 0x0080008080808080ull, /* 5F */
1353 0x0080800000000000ull, /* 60 */
1354 0x0080800000000080ull, /* 61 */
1355 0x0080800000008000ull, /* 62 */
1356 0x0080800000008080ull, /* 63 */
1357 0x0080800000800000ull, /* 64 */
1358 0x0080800000800080ull, /* 65 */
1359 0x0080800000808000ull, /* 66 */
1360 0x0080800000808080ull, /* 67 */
1361 0x0080800080000000ull, /* 68 */
1362 0x0080800080000080ull, /* 69 */
1363 0x0080800080008000ull, /* 6A */
1364 0x0080800080008080ull, /* 6B */
1365 0x0080800080800000ull, /* 6C */
1366 0x0080800080800080ull, /* 6D */
1367 0x0080800080808000ull, /* 6E */
1368 0x0080800080808080ull, /* 6F */
1369 0x0080808000000000ull, /* 70 */
1370 0x0080808000000080ull, /* 71 */
1371 0x0080808000008000ull, /* 72 */
1372 0x0080808000008080ull, /* 73 */
1373 0x0080808000800000ull, /* 74 */
1374 0x0080808000800080ull, /* 75 */
1375 0x0080808000808000ull, /* 76 */
1376 0x0080808000808080ull, /* 77 */
1377 0x0080808080000000ull, /* 78 */
1378 0x0080808080000080ull, /* 79 */
1379 0x0080808080008000ull, /* 7A */
1380 0x0080808080008080ull, /* 7B */
1381 0x0080808080800000ull, /* 7C */
1382 0x0080808080800080ull, /* 7D */
1383 0x0080808080808000ull, /* 7E */
1384 0x0080808080808080ull, /* 7F */
1385 0x8000000000000000ull, /* 80 */
1386 0x8000000000000080ull, /* 81 */
1387 0x8000000000008000ull, /* 82 */
1388 0x8000000000008080ull, /* 83 */
1389 0x8000000000800000ull, /* 84 */
1390 0x8000000000800080ull, /* 85 */
1391 0x8000000000808000ull, /* 86 */
1392 0x8000000000808080ull, /* 87 */
1393 0x8000000080000000ull, /* 88 */
1394 0x8000000080000080ull, /* 89 */
1395 0x8000000080008000ull, /* 8A */
1396 0x8000000080008080ull, /* 8B */
1397 0x8000000080800000ull, /* 8C */
1398 0x8000000080800080ull, /* 8D */
1399 0x8000000080808000ull, /* 8E */
1400 0x8000000080808080ull, /* 8F */
1401 0x8000008000000000ull, /* 90 */
1402 0x8000008000000080ull, /* 91 */
1403 0x8000008000008000ull, /* 92 */
1404 0x8000008000008080ull, /* 93 */
1405 0x8000008000800000ull, /* 94 */
1406 0x8000008000800080ull, /* 95 */
1407 0x8000008000808000ull, /* 96 */
1408 0x8000008000808080ull, /* 97 */
1409 0x8000008080000000ull, /* 98 */
1410 0x8000008080000080ull, /* 99 */
1411 0x8000008080008000ull, /* 9A */
1412 0x8000008080008080ull, /* 9B */
1413 0x8000008080800000ull, /* 9C */
1414 0x8000008080800080ull, /* 9D */
1415 0x8000008080808000ull, /* 9E */
1416 0x8000008080808080ull, /* 9F */
1417 0x8000800000000000ull, /* A0 */
1418 0x8000800000000080ull, /* A1 */
1419 0x8000800000008000ull, /* A2 */
1420 0x8000800000008080ull, /* A3 */
1421 0x8000800000800000ull, /* A4 */
1422 0x8000800000800080ull, /* A5 */
1423 0x8000800000808000ull, /* A6 */
1424 0x8000800000808080ull, /* A7 */
1425 0x8000800080000000ull, /* A8 */
1426 0x8000800080000080ull, /* A9 */
1427 0x8000800080008000ull, /* AA */
1428 0x8000800080008080ull, /* AB */
1429 0x8000800080800000ull, /* AC */
1430 0x8000800080800080ull, /* AD */
1431 0x8000800080808000ull, /* AE */
1432 0x8000800080808080ull, /* AF */
1433 0x8000808000000000ull, /* B0 */
1434 0x8000808000000080ull, /* B1 */
1435 0x8000808000008000ull, /* B2 */
1436 0x8000808000008080ull, /* B3 */
1437 0x8000808000800000ull, /* B4 */
1438 0x8000808000800080ull, /* B5 */
1439 0x8000808000808000ull, /* B6 */
1440 0x8000808000808080ull, /* B7 */
1441 0x8000808080000000ull, /* B8 */
1442 0x8000808080000080ull, /* B9 */
1443 0x8000808080008000ull, /* BA */
1444 0x8000808080008080ull, /* BB */
1445 0x8000808080800000ull, /* BC */
1446 0x8000808080800080ull, /* BD */
1447 0x8000808080808000ull, /* BE */
1448 0x8000808080808080ull, /* BF */
1449 0x8080000000000000ull, /* C0 */
1450 0x8080000000000080ull, /* C1 */
1451 0x8080000000008000ull, /* C2 */
1452 0x8080000000008080ull, /* C3 */
1453 0x8080000000800000ull, /* C4 */
1454 0x8080000000800080ull, /* C5 */
1455 0x8080000000808000ull, /* C6 */
1456 0x8080000000808080ull, /* C7 */
1457 0x8080000080000000ull, /* C8 */
1458 0x8080000080000080ull, /* C9 */
1459 0x8080000080008000ull, /* CA */
1460 0x8080000080008080ull, /* CB */
1461 0x8080000080800000ull, /* CC */
1462 0x8080000080800080ull, /* CD */
1463 0x8080000080808000ull, /* CE */
1464 0x8080000080808080ull, /* CF */
1465 0x8080008000000000ull, /* D0 */
1466 0x8080008000000080ull, /* D1 */
1467 0x8080008000008000ull, /* D2 */
1468 0x8080008000008080ull, /* D3 */
1469 0x8080008000800000ull, /* D4 */
1470 0x8080008000800080ull, /* D5 */
1471 0x8080008000808000ull, /* D6 */
1472 0x8080008000808080ull, /* D7 */
1473 0x8080008080000000ull, /* D8 */
1474 0x8080008080000080ull, /* D9 */
1475 0x8080008080008000ull, /* DA */
1476 0x8080008080008080ull, /* DB */
1477 0x8080008080800000ull, /* DC */
1478 0x8080008080800080ull, /* DD */
1479 0x8080008080808000ull, /* DE */
1480 0x8080008080808080ull, /* DF */
1481 0x8080800000000000ull, /* E0 */
1482 0x8080800000000080ull, /* E1 */
1483 0x8080800000008000ull, /* E2 */
1484 0x8080800000008080ull, /* E3 */
1485 0x8080800000800000ull, /* E4 */
1486 0x8080800000800080ull, /* E5 */
1487 0x8080800000808000ull, /* E6 */
1488 0x8080800000808080ull, /* E7 */
1489 0x8080800080000000ull, /* E8 */
1490 0x8080800080000080ull, /* E9 */
1491 0x8080800080008000ull, /* EA */
1492 0x8080800080008080ull, /* EB */
1493 0x8080800080800000ull, /* EC */
1494 0x8080800080800080ull, /* ED */
1495 0x8080800080808000ull, /* EE */
1496 0x8080800080808080ull, /* EF */
1497 0x8080808000000000ull, /* F0 */
1498 0x8080808000000080ull, /* F1 */
1499 0x8080808000008000ull, /* F2 */
1500 0x8080808000008080ull, /* F3 */
1501 0x8080808000800000ull, /* F4 */
1502 0x8080808000800080ull, /* F5 */
1503 0x8080808000808000ull, /* F6 */
1504 0x8080808000808080ull, /* F7 */
1505 0x8080808080000000ull, /* F8 */
1506 0x8080808080000080ull, /* F9 */
1507 0x8080808080008000ull, /* FA */
1508 0x8080808080008080ull, /* FB */
1509 0x8080808080800000ull, /* FC */
1510 0x8080808080800080ull, /* FD */
1511 0x8080808080808000ull, /* FE */
1512 0x8080808080808080ull, /* FF */
1513};
1514
1515void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1516{
1517 int i;
1518 uint64_t t[2] = { 0, 0 };
1519
1520 VECTOR_FOR_INORDER_I(i, u8) {
1521#if defined(HOST_WORDS_BIGENDIAN)
1522 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1523#else
1524 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1525#endif
1526 }
1527
1528 r->u64[0] = t[0];
1529 r->u64[1] = t[1];
1530}
1531
b8476fc7
TM
1532#define PMSUM(name, srcfld, trgfld, trgtyp) \
1533void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1534{ \
1535 int i, j; \
1536 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1537 \
1538 VECTOR_FOR_INORDER_I(i, srcfld) { \
1539 prod[i] = 0; \
1540 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1541 if (a->srcfld[i] & (1ull<<j)) { \
1542 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1543 } \
1544 } \
1545 } \
1546 \
1547 VECTOR_FOR_INORDER_I(i, trgfld) { \
1548 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1549 } \
1550}
1551
1552PMSUM(vpmsumb, u8, u16, uint16_t)
1553PMSUM(vpmsumh, u16, u32, uint32_t)
1554PMSUM(vpmsumw, u32, u64, uint64_t)
1555
1556void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1557{
1558
1559#ifdef CONFIG_INT128
1560 int i, j;
1561 __uint128_t prod[2];
1562
1563 VECTOR_FOR_INORDER_I(i, u64) {
1564 prod[i] = 0;
1565 for (j = 0; j < 64; j++) {
1566 if (a->u64[i] & (1ull<<j)) {
1567 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1568 }
1569 }
1570 }
1571
1572 r->u128 = prod[0] ^ prod[1];
1573
1574#else
1575 int i, j;
1576 ppc_avr_t prod[2];
1577
1578 VECTOR_FOR_INORDER_I(i, u64) {
1579 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1580 for (j = 0; j < 64; j++) {
1581 if (a->u64[i] & (1ull<<j)) {
1582 ppc_avr_t bshift;
1583 if (j == 0) {
1584 bshift.u64[HI_IDX] = 0;
1585 bshift.u64[LO_IDX] = b->u64[i];
1586 } else {
1587 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1588 bshift.u64[LO_IDX] = b->u64[i] << j;
1589 }
1590 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1591 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1592 }
1593 }
1594 }
1595
1596 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1597 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1598#endif
1599}
1600
1601
64654ded
BS
1602#if defined(HOST_WORDS_BIGENDIAN)
1603#define PKBIG 1
1604#else
1605#define PKBIG 0
1606#endif
1607void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1608{
1609 int i, j;
1610 ppc_avr_t result;
1611#if defined(HOST_WORDS_BIGENDIAN)
1612 const ppc_avr_t *x[2] = { a, b };
1613#else
1614 const ppc_avr_t *x[2] = { b, a };
1615#endif
1616
1617 VECTOR_FOR_INORDER_I(i, u64) {
1618 VECTOR_FOR_INORDER_I(j, u32) {
1619 uint32_t e = x[i]->u32[j];
1620
1621 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1622 ((e >> 6) & 0x3e0) |
1623 ((e >> 3) & 0x1f));
1624 }
1625 }
1626 *r = result;
1627}
1628
1629#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1630 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1631 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1632 { \
1633 int i; \
1634 int sat = 0; \
1635 ppc_avr_t result; \
1636 ppc_avr_t *a0 = PKBIG ? a : b; \
1637 ppc_avr_t *a1 = PKBIG ? b : a; \
1638 \
1639 VECTOR_FOR_INORDER_I(i, from) { \
1640 result.to[i] = cvt(a0->from[i], &sat); \
1641 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1642 } \
1643 *r = result; \
1644 if (dosat && sat) { \
1645 env->vscr |= (1 << VSCR_SAT); \
1646 } \
1647 }
1648#define I(x, y) (x)
1649VPK(shss, s16, s8, cvtshsb, 1)
1650VPK(shus, s16, u8, cvtshub, 1)
1651VPK(swss, s32, s16, cvtswsh, 1)
1652VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1653VPK(sdss, s64, s32, cvtsdsw, 1)
1654VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1655VPK(uhus, u16, u8, cvtuhub, 1)
1656VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1657VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1658VPK(uhum, u16, u8, I, 0)
1659VPK(uwum, u32, u16, I, 0)
024215b2 1660VPK(udum, u64, u32, I, 0)
64654ded
BS
1661#undef I
1662#undef VPK
1663#undef PKBIG
1664
d15f74fb 1665void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1666{
1667 int i;
1668
1669 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1670 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1671 }
1672}
1673
1674#define VRFI(suffix, rounding) \
d15f74fb
BS
1675 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1676 ppc_avr_t *b) \
64654ded
BS
1677 { \
1678 int i; \
1679 float_status s = env->vec_status; \
1680 \
1681 set_float_rounding_mode(rounding, &s); \
1682 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1683 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1684 } \
1685 }
1686VRFI(n, float_round_nearest_even)
1687VRFI(m, float_round_down)
1688VRFI(p, float_round_up)
1689VRFI(z, float_round_to_zero)
1690#undef VRFI
1691
818692ff 1692#define VROTATE(suffix, element, mask) \
64654ded
BS
1693 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1694 { \
1695 int i; \
1696 \
1697 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1698 unsigned int shift = b->element[i] & mask; \
1699 r->element[i] = (a->element[i] << shift) | \
1700 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1701 } \
1702 }
818692ff
TM
1703VROTATE(b, u8, 0x7)
1704VROTATE(h, u16, 0xF)
1705VROTATE(w, u32, 0x1F)
2fdf78e6 1706VROTATE(d, u64, 0x3F)
64654ded
BS
1707#undef VROTATE
1708
d15f74fb 1709void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1710{
1711 int i;
1712
1713 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1714 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1715
ef9bd150 1716 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1717 }
1718}
1719
d15f74fb
BS
1720void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1721 ppc_avr_t *c)
64654ded
BS
1722{
1723 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1724 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1725}
1726
d15f74fb 1727void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1728{
1729 int i;
1730
1731 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1732 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1733 }
1734}
1735
d15f74fb 1736void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1737{
1738 int i;
1739
1740 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1741 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1742 }
1743}
1744
64654ded
BS
1745/* The specification says that the results are undefined if all of the
1746 * shift counts are not identical. We check to make sure that they are
1747 * to conform to what real hardware appears to do. */
1748#define VSHIFT(suffix, leftp) \
1749 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1750 { \
1751 int shift = b->u8[LO_IDX*15] & 0x7; \
1752 int doit = 1; \
1753 int i; \
1754 \
1755 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1756 doit = doit && ((b->u8[i] & 0x7) == shift); \
1757 } \
1758 if (doit) { \
1759 if (shift == 0) { \
1760 *r = *a; \
1761 } else if (leftp) { \
1762 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1763 \
1764 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1765 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1766 } else { \
1767 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1768 \
1769 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1770 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1771 } \
1772 } \
1773 }
24e669ba
TM
1774VSHIFT(l, 1)
1775VSHIFT(r, 0)
64654ded 1776#undef VSHIFT
64654ded 1777
818692ff 1778#define VSL(suffix, element, mask) \
64654ded
BS
1779 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1780 { \
1781 int i; \
1782 \
1783 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1784 unsigned int shift = b->element[i] & mask; \
1785 \
1786 r->element[i] = a->element[i] << shift; \
1787 } \
1788 }
818692ff
TM
1789VSL(b, u8, 0x7)
1790VSL(h, u16, 0x0F)
1791VSL(w, u32, 0x1F)
2fdf78e6 1792VSL(d, u64, 0x3F)
64654ded
BS
1793#undef VSL
1794
5644a175
VAS
1795void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1796{
1797 int i;
1798 unsigned int shift, bytes, size;
1799
1800 size = ARRAY_SIZE(r->u8);
1801 for (i = 0; i < size; i++) {
1802 shift = b->u8[i] & 0x7; /* extract shift value */
1803 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1804 (((i + 1) < size) ? a->u8[i + 1] : 0);
1805 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1806 }
1807}
1808
4004c1db
VAS
1809void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1810{
1811 int i;
1812 unsigned int shift, bytes;
1813
1814 /* Use reverse order, as destination and source register can be same. Its
1815 * being modified in place saving temporary, reverse order will guarantee
1816 * that computed result is not fed back.
1817 */
1818 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1819 shift = b->u8[i] & 0x7; /* extract shift value */
1820 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1821 /* extract adjacent bytes */
1822 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1823 }
1824}
1825
64654ded
BS
1826void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1827{
1828 int sh = shift & 0xf;
1829 int i;
1830 ppc_avr_t result;
1831
1832#if defined(HOST_WORDS_BIGENDIAN)
1833 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1834 int index = sh + i;
1835 if (index > 0xf) {
1836 result.u8[i] = b->u8[index - 0x10];
1837 } else {
1838 result.u8[i] = a->u8[index];
1839 }
1840 }
1841#else
1842 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1843 int index = (16 - sh) + i;
1844 if (index > 0xf) {
1845 result.u8[i] = a->u8[index - 0x10];
1846 } else {
1847 result.u8[i] = b->u8[index];
1848 }
1849 }
1850#endif
1851 *r = result;
1852}
1853
1854void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1855{
1856 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1857
1858#if defined(HOST_WORDS_BIGENDIAN)
1859 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1860 memset(&r->u8[16-sh], 0, sh);
1861#else
1862 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1863 memset(&r->u8[0], 0, sh);
1864#endif
1865}
1866
1867/* Experimental testing shows that hardware masks the immediate. */
1868#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1869#if defined(HOST_WORDS_BIGENDIAN)
1870#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1871#else
1872#define SPLAT_ELEMENT(element) \
1873 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1874#endif
1875#define VSPLT(suffix, element) \
1876 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1877 { \
1878 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1879 int i; \
1880 \
1881 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1882 r->element[i] = s; \
1883 } \
1884 }
1885VSPLT(b, u8)
1886VSPLT(h, u16)
1887VSPLT(w, u32)
1888#undef VSPLT
1889#undef SPLAT_ELEMENT
1890#undef _SPLAT_MASKED
e7b1e06f
RS
1891#if defined(HOST_WORDS_BIGENDIAN)
1892#define VINSERT(suffix, element) \
1893 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1894 { \
1895 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1896 sizeof(r->element[0])); \
1897 }
1898#else
1899#define VINSERT(suffix, element) \
1900 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1901 { \
1902 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1903 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1904 }
1905#endif
1906VINSERT(b, u8)
1907VINSERT(h, u16)
1908VINSERT(w, u32)
1909VINSERT(d, u64)
1910#undef VINSERT
b5d569a1
RS
1911#if defined(HOST_WORDS_BIGENDIAN)
1912#define VEXTRACT(suffix, element) \
1913 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1914 { \
1915 uint32_t es = sizeof(r->element[0]); \
1916 memmove(&r->u8[8 - es], &b->u8[index], es); \
1917 memset(&r->u8[8], 0, 8); \
1918 memset(&r->u8[0], 0, 8 - es); \
1919 }
1920#else
1921#define VEXTRACT(suffix, element) \
1922 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1923 { \
1924 uint32_t es = sizeof(r->element[0]); \
1925 uint32_t s = (16 - index) - es; \
1926 memmove(&r->u8[8], &b->u8[s], es); \
1927 memset(&r->u8[0], 0, 8); \
1928 memset(&r->u8[8 + es], 0, 8 - es); \
1929 }
1930#endif
1931VEXTRACT(ub, u8)
1932VEXTRACT(uh, u16)
1933VEXTRACT(uw, u32)
1934VEXTRACT(d, u64)
1935#undef VEXTRACT
64654ded
BS
1936
1937#define VSPLTI(suffix, element, splat_type) \
1938 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1939 { \
1940 splat_type x = (int8_t)(splat << 3) >> 3; \
1941 int i; \
1942 \
1943 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1944 r->element[i] = x; \
1945 } \
1946 }
1947VSPLTI(b, s8, int8_t)
1948VSPLTI(h, s16, int16_t)
1949VSPLTI(w, s32, int32_t)
1950#undef VSPLTI
1951
818692ff 1952#define VSR(suffix, element, mask) \
64654ded
BS
1953 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1954 { \
1955 int i; \
1956 \
1957 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 1958 unsigned int shift = b->element[i] & mask; \
64654ded
BS
1959 r->element[i] = a->element[i] >> shift; \
1960 } \
1961 }
818692ff
TM
1962VSR(ab, s8, 0x7)
1963VSR(ah, s16, 0xF)
1964VSR(aw, s32, 0x1F)
2fdf78e6 1965VSR(ad, s64, 0x3F)
818692ff
TM
1966VSR(b, u8, 0x7)
1967VSR(h, u16, 0xF)
1968VSR(w, u32, 0x1F)
2fdf78e6 1969VSR(d, u64, 0x3F)
64654ded
BS
1970#undef VSR
1971
1972void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1973{
1974 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1975
1976#if defined(HOST_WORDS_BIGENDIAN)
1977 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1978 memset(&r->u8[0], 0, sh);
1979#else
1980 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1981 memset(&r->u8[16 - sh], 0, sh);
1982#endif
1983}
1984
1985void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1986{
1987 int i;
1988
1989 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1990 r->u32[i] = a->u32[i] >= b->u32[i];
1991 }
1992}
1993
d15f74fb 1994void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1995{
1996 int64_t t;
1997 int i, upper;
1998 ppc_avr_t result;
1999 int sat = 0;
2000
2001#if defined(HOST_WORDS_BIGENDIAN)
2002 upper = ARRAY_SIZE(r->s32)-1;
2003#else
2004 upper = 0;
2005#endif
2006 t = (int64_t)b->s32[upper];
2007 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2008 t += a->s32[i];
2009 result.s32[i] = 0;
2010 }
2011 result.s32[upper] = cvtsdsw(t, &sat);
2012 *r = result;
2013
2014 if (sat) {
2015 env->vscr |= (1 << VSCR_SAT);
2016 }
2017}
2018
d15f74fb 2019void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2020{
2021 int i, j, upper;
2022 ppc_avr_t result;
2023 int sat = 0;
2024
2025#if defined(HOST_WORDS_BIGENDIAN)
2026 upper = 1;
2027#else
2028 upper = 0;
2029#endif
2030 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2031 int64_t t = (int64_t)b->s32[upper + i * 2];
2032
2033 result.u64[i] = 0;
2034 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2035 t += a->s32[2 * i + j];
2036 }
2037 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2038 }
2039
2040 *r = result;
2041 if (sat) {
2042 env->vscr |= (1 << VSCR_SAT);
2043 }
2044}
2045
d15f74fb 2046void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2047{
2048 int i, j;
2049 int sat = 0;
2050
2051 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2052 int64_t t = (int64_t)b->s32[i];
2053
2054 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2055 t += a->s8[4 * i + j];
2056 }
2057 r->s32[i] = cvtsdsw(t, &sat);
2058 }
2059
2060 if (sat) {
2061 env->vscr |= (1 << VSCR_SAT);
2062 }
2063}
2064
d15f74fb 2065void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2066{
2067 int sat = 0;
2068 int i;
2069
2070 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2071 int64_t t = (int64_t)b->s32[i];
2072
2073 t += a->s16[2 * i] + a->s16[2 * i + 1];
2074 r->s32[i] = cvtsdsw(t, &sat);
2075 }
2076
2077 if (sat) {
2078 env->vscr |= (1 << VSCR_SAT);
2079 }
2080}
2081
d15f74fb 2082void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2083{
2084 int i, j;
2085 int sat = 0;
2086
2087 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2088 uint64_t t = (uint64_t)b->u32[i];
2089
2090 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2091 t += a->u8[4 * i + j];
2092 }
2093 r->u32[i] = cvtuduw(t, &sat);
2094 }
2095
2096 if (sat) {
2097 env->vscr |= (1 << VSCR_SAT);
2098 }
2099}
2100
2101#if defined(HOST_WORDS_BIGENDIAN)
2102#define UPKHI 1
2103#define UPKLO 0
2104#else
2105#define UPKHI 0
2106#define UPKLO 1
2107#endif
2108#define VUPKPX(suffix, hi) \
2109 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2110 { \
2111 int i; \
2112 ppc_avr_t result; \
2113 \
2114 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2115 uint16_t e = b->u16[hi ? i : i+4]; \
2116 uint8_t a = (e >> 15) ? 0xff : 0; \
2117 uint8_t r = (e >> 10) & 0x1f; \
2118 uint8_t g = (e >> 5) & 0x1f; \
2119 uint8_t b = e & 0x1f; \
2120 \
2121 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2122 } \
2123 *r = result; \
2124 }
2125VUPKPX(lpx, UPKLO)
2126VUPKPX(hpx, UPKHI)
2127#undef VUPKPX
2128
2129#define VUPK(suffix, unpacked, packee, hi) \
2130 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2131 { \
2132 int i; \
2133 ppc_avr_t result; \
2134 \
2135 if (hi) { \
2136 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2137 result.unpacked[i] = b->packee[i]; \
2138 } \
2139 } else { \
2140 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2141 i++) { \
2142 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2143 } \
2144 } \
2145 *r = result; \
2146 }
2147VUPK(hsb, s16, s8, UPKHI)
2148VUPK(hsh, s32, s16, UPKHI)
4430e076 2149VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2150VUPK(lsb, s16, s8, UPKLO)
2151VUPK(lsh, s32, s16, UPKLO)
4430e076 2152VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2153#undef VUPK
2154#undef UPKHI
2155#undef UPKLO
2156
f293f04a
TM
2157#define VGENERIC_DO(name, element) \
2158 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2159 { \
2160 int i; \
2161 \
2162 VECTOR_FOR_INORDER_I(i, element) { \
2163 r->element[i] = name(b->element[i]); \
2164 } \
2165 }
2166
2167#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2168#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2169#define clzw(v) clz32((v))
2170#define clzd(v) clz64((v))
2171
2172VGENERIC_DO(clzb, u8)
2173VGENERIC_DO(clzh, u16)
2174VGENERIC_DO(clzw, u32)
2175VGENERIC_DO(clzd, u64)
2176
2177#undef clzb
2178#undef clzh
2179#undef clzw
2180#undef clzd
2181
a5ad8fbf
RS
2182#define ctzb(v) ((v) ? ctz32(v) : 8)
2183#define ctzh(v) ((v) ? ctz32(v) : 16)
2184#define ctzw(v) ctz32((v))
2185#define ctzd(v) ctz64((v))
2186
2187VGENERIC_DO(ctzb, u8)
2188VGENERIC_DO(ctzh, u16)
2189VGENERIC_DO(ctzw, u32)
2190VGENERIC_DO(ctzd, u64)
2191
2192#undef ctzb
2193#undef ctzh
2194#undef ctzw
2195#undef ctzd
2196
e13500b3
TM
2197#define popcntb(v) ctpop8(v)
2198#define popcnth(v) ctpop16(v)
2199#define popcntw(v) ctpop32(v)
2200#define popcntd(v) ctpop64(v)
2201
2202VGENERIC_DO(popcntb, u8)
2203VGENERIC_DO(popcnth, u16)
2204VGENERIC_DO(popcntw, u32)
2205VGENERIC_DO(popcntd, u64)
2206
2207#undef popcntb
2208#undef popcnth
2209#undef popcntw
2210#undef popcntd
f293f04a
TM
2211
2212#undef VGENERIC_DO
2213
b41da4eb
TM
2214#if defined(HOST_WORDS_BIGENDIAN)
2215#define QW_ONE { .u64 = { 0, 1 } }
2216#else
2217#define QW_ONE { .u64 = { 1, 0 } }
2218#endif
2219
2220#ifndef CONFIG_INT128
2221
2222static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2223{
2224 t->u64[0] = ~a.u64[0];
2225 t->u64[1] = ~a.u64[1];
2226}
2227
2228static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2229{
2230 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2231 return -1;
2232 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2233 return 1;
2234 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2235 return -1;
2236 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2237 return 1;
2238 } else {
2239 return 0;
2240 }
2241}
2242
2243static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2244{
2245 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2246 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2247 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2248}
2249
2250static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2251{
2252 ppc_avr_t not_a;
2253 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2254 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2255 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2256 avr_qw_not(&not_a, a);
2257 return avr_qw_cmpu(not_a, b) < 0;
2258}
2259
2260#endif
2261
2262void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2263{
2264#ifdef CONFIG_INT128
2265 r->u128 = a->u128 + b->u128;
2266#else
2267 avr_qw_add(r, *a, *b);
2268#endif
2269}
2270
2271void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2272{
2273#ifdef CONFIG_INT128
2274 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2275#else
2276
2277 if (c->u64[LO_IDX] & 1) {
2278 ppc_avr_t tmp;
2279
2280 tmp.u64[HI_IDX] = 0;
2281 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2282 avr_qw_add(&tmp, *a, tmp);
2283 avr_qw_add(r, tmp, *b);
2284 } else {
2285 avr_qw_add(r, *a, *b);
2286 }
2287#endif
2288}
2289
2290void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2291{
2292#ifdef CONFIG_INT128
2293 r->u128 = (~a->u128 < b->u128);
2294#else
2295 ppc_avr_t not_a;
2296
2297 avr_qw_not(&not_a, *a);
2298
2299 r->u64[HI_IDX] = 0;
2300 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2301#endif
2302}
2303
2304void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2305{
2306#ifdef CONFIG_INT128
2307 int carry_out = (~a->u128 < b->u128);
2308 if (!carry_out && (c->u128 & 1)) {
2309 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2310 ((a->u128 != 0) || (b->u128 != 0));
2311 }
2312 r->u128 = carry_out;
2313#else
2314
2315 int carry_in = c->u64[LO_IDX] & 1;
2316 int carry_out = 0;
2317 ppc_avr_t tmp;
2318
2319 carry_out = avr_qw_addc(&tmp, *a, *b);
2320
2321 if (!carry_out && carry_in) {
2322 ppc_avr_t one = QW_ONE;
2323 carry_out = avr_qw_addc(&tmp, tmp, one);
2324 }
2325 r->u64[HI_IDX] = 0;
2326 r->u64[LO_IDX] = carry_out;
2327#endif
2328}
2329
2330void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2331{
2332#ifdef CONFIG_INT128
2333 r->u128 = a->u128 - b->u128;
2334#else
2335 ppc_avr_t tmp;
2336 ppc_avr_t one = QW_ONE;
2337
2338 avr_qw_not(&tmp, *b);
2339 avr_qw_add(&tmp, *a, tmp);
2340 avr_qw_add(r, tmp, one);
2341#endif
2342}
2343
2344void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2345{
2346#ifdef CONFIG_INT128
2347 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2348#else
2349 ppc_avr_t tmp, sum;
2350
2351 avr_qw_not(&tmp, *b);
2352 avr_qw_add(&sum, *a, tmp);
2353
2354 tmp.u64[HI_IDX] = 0;
2355 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2356 avr_qw_add(r, sum, tmp);
2357#endif
2358}
2359
2360void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2361{
2362#ifdef CONFIG_INT128
2363 r->u128 = (~a->u128 < ~b->u128) ||
2364 (a->u128 + ~b->u128 == (__uint128_t)-1);
2365#else
2366 int carry = (avr_qw_cmpu(*a, *b) > 0);
2367 if (!carry) {
2368 ppc_avr_t tmp;
2369 avr_qw_not(&tmp, *b);
2370 avr_qw_add(&tmp, *a, tmp);
2371 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2372 }
2373 r->u64[HI_IDX] = 0;
2374 r->u64[LO_IDX] = carry;
2375#endif
2376}
2377
2378void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2379{
2380#ifdef CONFIG_INT128
2381 r->u128 =
2382 (~a->u128 < ~b->u128) ||
2383 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2384#else
2385 int carry_in = c->u64[LO_IDX] & 1;
2386 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2387 if (!carry_out && carry_in) {
2388 ppc_avr_t tmp;
2389 avr_qw_not(&tmp, *b);
2390 avr_qw_add(&tmp, *a, tmp);
2391 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2392 }
2393
2394 r->u64[HI_IDX] = 0;
2395 r->u64[LO_IDX] = carry_out;
2396#endif
2397}
2398
e8f7b27b
TM
2399#define BCD_PLUS_PREF_1 0xC
2400#define BCD_PLUS_PREF_2 0xF
2401#define BCD_PLUS_ALT_1 0xA
2402#define BCD_NEG_PREF 0xD
2403#define BCD_NEG_ALT 0xB
2404#define BCD_PLUS_ALT_2 0xE
2405
2406#if defined(HOST_WORDS_BIGENDIAN)
2407#define BCD_DIG_BYTE(n) (15 - (n/2))
2408#else
2409#define BCD_DIG_BYTE(n) (n/2)
2410#endif
2411
2412static int bcd_get_sgn(ppc_avr_t *bcd)
2413{
2414 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2415 case BCD_PLUS_PREF_1:
2416 case BCD_PLUS_PREF_2:
2417 case BCD_PLUS_ALT_1:
2418 case BCD_PLUS_ALT_2:
2419 {
2420 return 1;
2421 }
2422
2423 case BCD_NEG_PREF:
2424 case BCD_NEG_ALT:
2425 {
2426 return -1;
2427 }
2428
2429 default:
2430 {
2431 return 0;
2432 }
2433 }
2434}
2435
2436static int bcd_preferred_sgn(int sgn, int ps)
2437{
2438 if (sgn >= 0) {
2439 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2440 } else {
2441 return BCD_NEG_PREF;
2442 }
2443}
2444
2445static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2446{
2447 uint8_t result;
2448 if (n & 1) {
2449 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2450 } else {
2451 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2452 }
2453
2454 if (unlikely(result > 9)) {
2455 *invalid = true;
2456 }
2457 return result;
2458}
2459
2460static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2461{
2462 if (n & 1) {
2463 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2464 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2465 } else {
2466 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2467 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2468 }
2469}
2470
2471static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2472{
2473 int i;
2474 int invalid = 0;
2475 for (i = 31; i > 0; i--) {
2476 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2477 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2478 if (unlikely(invalid)) {
3b163b01 2479 return 0; /* doesn't matter */
e8f7b27b
TM
2480 } else if (dig_a > dig_b) {
2481 return 1;
2482 } else if (dig_a < dig_b) {
2483 return -1;
2484 }
2485 }
2486
2487 return 0;
2488}
2489
2490static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2491 int *overflow)
2492{
2493 int carry = 0;
2494 int i;
2495 int is_zero = 1;
2496 for (i = 1; i <= 31; i++) {
2497 uint8_t digit = bcd_get_digit(a, i, invalid) +
2498 bcd_get_digit(b, i, invalid) + carry;
2499 is_zero &= (digit == 0);
2500 if (digit > 9) {
2501 carry = 1;
2502 digit -= 10;
2503 } else {
2504 carry = 0;
2505 }
2506
2507 bcd_put_digit(t, digit, i);
2508
2509 if (unlikely(*invalid)) {
2510 return -1;
2511 }
2512 }
2513
2514 *overflow = carry;
2515 return is_zero;
2516}
2517
2518static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2519 int *overflow)
2520{
2521 int carry = 0;
2522 int i;
2523 int is_zero = 1;
2524 for (i = 1; i <= 31; i++) {
2525 uint8_t digit = bcd_get_digit(a, i, invalid) -
2526 bcd_get_digit(b, i, invalid) + carry;
2527 is_zero &= (digit == 0);
2528 if (digit & 0x80) {
2529 carry = -1;
2530 digit += 10;
2531 } else {
2532 carry = 0;
2533 }
2534
2535 bcd_put_digit(t, digit, i);
2536
2537 if (unlikely(*invalid)) {
2538 return -1;
2539 }
2540 }
2541
2542 *overflow = carry;
2543 return is_zero;
2544}
2545
2546uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2547{
2548
2549 int sgna = bcd_get_sgn(a);
2550 int sgnb = bcd_get_sgn(b);
2551 int invalid = (sgna == 0) || (sgnb == 0);
2552 int overflow = 0;
2553 int zero = 0;
2554 uint32_t cr = 0;
2555 ppc_avr_t result = { .u64 = { 0, 0 } };
2556
2557 if (!invalid) {
2558 if (sgna == sgnb) {
2559 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2560 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
72189ea4 2561 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2562 } else if (bcd_cmp_mag(a, b) > 0) {
2563 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2564 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
72189ea4 2565 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2566 } else {
2567 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2568 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
72189ea4 2569 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2570 }
2571 }
2572
2573 if (unlikely(invalid)) {
2574 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
72189ea4 2575 cr = 1 << CRF_SO;
e8f7b27b 2576 } else if (overflow) {
72189ea4 2577 cr |= 1 << CRF_SO;
e8f7b27b 2578 } else if (zero) {
72189ea4 2579 cr = 1 << CRF_EQ;
e8f7b27b
TM
2580 }
2581
2582 *r = result;
2583
2584 return cr;
2585}
2586
2587uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2588{
2589 ppc_avr_t bcopy = *b;
2590 int sgnb = bcd_get_sgn(b);
2591 if (sgnb < 0) {
2592 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2593 } else if (sgnb > 0) {
2594 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2595 }
2596 /* else invalid ... defer to bcdadd code for proper handling */
2597
2598 return helper_bcdadd(r, a, &bcopy, ps);
2599}
f293f04a 2600
c1542453 2601void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2602{
2603 int i;
2604 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2605 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2606 }
2607}
2608
c1542453 2609void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2610{
65cf1f65 2611 ppc_avr_t result;
557d52fa 2612 int i;
557d52fa 2613
c1542453 2614 VECTOR_FOR_INORDER_I(i, u32) {
65cf1f65 2615 result.AVRW(i) = b->AVRW(i) ^
c1542453
TM
2616 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2617 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2618 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2619 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
557d52fa 2620 }
65cf1f65 2621 *r = result;
557d52fa
TM
2622}
2623
557d52fa
TM
2624void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2625{
65cf1f65 2626 ppc_avr_t result;
c1542453
TM
2627 int i;
2628
2629 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2630 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
c1542453 2631 }
65cf1f65 2632 *r = result;
557d52fa
TM
2633}
2634
2635void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2636{
2637 /* This differs from what is written in ISA V2.07. The RTL is */
2638 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2639 int i;
2640 ppc_avr_t tmp;
2641
2642 VECTOR_FOR_INORDER_I(i, u8) {
2643 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2644 }
2645
2646 VECTOR_FOR_INORDER_I(i, u32) {
2647 r->AVRW(i) =
2648 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2649 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2650 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2651 AES_imc[tmp.AVRB(4*i + 3)][3];
2652 }
557d52fa
TM
2653}
2654
2655void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2656{
65cf1f65 2657 ppc_avr_t result;
c1542453
TM
2658 int i;
2659
2660 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2661 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
c1542453 2662 }
65cf1f65 2663 *r = result;
557d52fa
TM
2664}
2665
57354f8f
TM
2666#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2667#if defined(HOST_WORDS_BIGENDIAN)
2668#define EL_IDX(i) (i)
2669#else
2670#define EL_IDX(i) (3 - (i))
2671#endif
2672
2673void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2674{
2675 int st = (st_six & 0x10) != 0;
2676 int six = st_six & 0xF;
2677 int i;
2678
2679 VECTOR_FOR_INORDER_I(i, u32) {
2680 if (st == 0) {
2681 if ((six & (0x8 >> i)) == 0) {
2682 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2683 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2684 (a->u32[EL_IDX(i)] >> 3);
2685 } else { /* six.bit[i] == 1 */
2686 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2687 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2688 (a->u32[EL_IDX(i)] >> 10);
2689 }
2690 } else { /* st == 1 */
2691 if ((six & (0x8 >> i)) == 0) {
2692 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2693 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2694 ROTRu32(a->u32[EL_IDX(i)], 22);
2695 } else { /* six.bit[i] == 1 */
2696 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2697 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2698 ROTRu32(a->u32[EL_IDX(i)], 25);
2699 }
2700 }
2701 }
2702}
2703
2704#undef ROTRu32
2705#undef EL_IDX
2706
2707#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2708#if defined(HOST_WORDS_BIGENDIAN)
2709#define EL_IDX(i) (i)
2710#else
2711#define EL_IDX(i) (1 - (i))
2712#endif
2713
2714void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2715{
2716 int st = (st_six & 0x10) != 0;
2717 int six = st_six & 0xF;
2718 int i;
2719
2720 VECTOR_FOR_INORDER_I(i, u64) {
2721 if (st == 0) {
2722 if ((six & (0x8 >> (2*i))) == 0) {
2723 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2724 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2725 (a->u64[EL_IDX(i)] >> 7);
2726 } else { /* six.bit[2*i] == 1 */
2727 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2728 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2729 (a->u64[EL_IDX(i)] >> 6);
2730 }
2731 } else { /* st == 1 */
2732 if ((six & (0x8 >> (2*i))) == 0) {
2733 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2734 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2735 ROTRu64(a->u64[EL_IDX(i)], 39);
2736 } else { /* six.bit[2*i] == 1 */
2737 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2738 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2739 ROTRu64(a->u64[EL_IDX(i)], 41);
2740 }
2741 }
2742 }
2743}
2744
2745#undef ROTRu64
2746#undef EL_IDX
2747
ac174549
TM
2748void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2749{
65cf1f65 2750 ppc_avr_t result;
ac174549 2751 int i;
65cf1f65 2752
ac174549
TM
2753 VECTOR_FOR_INORDER_I(i, u8) {
2754 int indexA = c->u8[i] >> 4;
2755 int indexB = c->u8[i] & 0xF;
2756#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 2757 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 2758#else
65cf1f65 2759 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
2760#endif
2761 }
65cf1f65 2762 *r = result;
ac174549
TM
2763}
2764
64654ded
BS
2765#undef VECTOR_FOR_INORDER_I
2766#undef HI_IDX
2767#undef LO_IDX
2768
2769/*****************************************************************************/
2770/* SPE extension helpers */
2771/* Use a table to make this quicker */
ea6c0dac 2772static const uint8_t hbrev[16] = {
64654ded
BS
2773 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2774 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2775};
2776
2777static inline uint8_t byte_reverse(uint8_t val)
2778{
2779 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2780}
2781
2782static inline uint32_t word_reverse(uint32_t val)
2783{
2784 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2785 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2786}
2787
2788#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2789target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2790{
2791 uint32_t a, b, d, mask;
2792
2793 mask = UINT32_MAX >> (32 - MASKBITS);
2794 a = arg1 & mask;
2795 b = arg2 & mask;
2796 d = word_reverse(1 + word_reverse(a | ~b));
2797 return (arg1 & ~mask) | (d & b);
2798}
2799
2800uint32_t helper_cntlsw32(uint32_t val)
2801{
2802 if (val & 0x80000000) {
2803 return clz32(~val);
2804 } else {
2805 return clz32(val);
2806 }
2807}
2808
2809uint32_t helper_cntlzw32(uint32_t val)
2810{
2811 return clz32(val);
2812}
2813
2814/* 440 specific */
d15f74fb
BS
2815target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2816 target_ulong low, uint32_t update_Rc)
64654ded
BS
2817{
2818 target_ulong mask;
2819 int i;
2820
2821 i = 1;
2822 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2823 if ((high & mask) == 0) {
2824 if (update_Rc) {
2825 env->crf[0] = 0x4;
2826 }
2827 goto done;
2828 }
2829 i++;
2830 }
2831 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2832 if ((low & mask) == 0) {
2833 if (update_Rc) {
2834 env->crf[0] = 0x8;
2835 }
2836 goto done;
2837 }
2838 i++;
2839 }
ebbd8b40 2840 i = 8;
64654ded
BS
2841 if (update_Rc) {
2842 env->crf[0] = 0x2;
2843 }
2844 done:
2845 env->xer = (env->xer & ~0x7F) | i;
2846 if (update_Rc) {
2847 env->crf[0] |= xer_so;
2848 }
2849 return i;
2850}