]> git.proxmox.com Git - mirror_qemu.git/blame - target/ppc/int_helper.c
hw/ppc/spapr: Add support for "-vga cirrus"
[mirror_qemu.git] / target / ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
3e00884f 21#include "internal.h"
1de7afc9 22#include "qemu/host-utils.h"
2ef6175a 23#include "exec/helper-proto.h"
6f2945cd 24#include "crypto/aes.h"
24f91e81 25#include "fpu/softfloat.h"
64654ded
BS
26
27#include "helper_regs.h"
28/*****************************************************************************/
29/* Fixed point operations helpers */
64654ded 30
f32899de
ND
31static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
32{
33 if (unlikely(ov)) {
34 env->so = env->ov = 1;
35 } else {
36 env->ov = 0;
37 }
38}
39
6a4fda33
TM
40target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
41 uint32_t oe)
42{
43 uint64_t rt = 0;
44 int overflow = 0;
45
46 uint64_t dividend = (uint64_t)ra << 32;
47 uint64_t divisor = (uint32_t)rb;
48
49 if (unlikely(divisor == 0)) {
50 overflow = 1;
51 } else {
52 rt = dividend / divisor;
53 overflow = rt > UINT32_MAX;
54 }
55
56 if (unlikely(overflow)) {
57 rt = 0; /* Undefined */
58 }
59
60 if (oe) {
f32899de 61 helper_update_ov_legacy(env, overflow);
6a4fda33
TM
62 }
63
64 return (target_ulong)rt;
65}
66
a98eb9e9
TM
67target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
68 uint32_t oe)
69{
70 int64_t rt = 0;
71 int overflow = 0;
72
73 int64_t dividend = (int64_t)ra << 32;
74 int64_t divisor = (int64_t)((int32_t)rb);
75
76 if (unlikely((divisor == 0) ||
77 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
78 overflow = 1;
79 } else {
80 rt = dividend / divisor;
81 overflow = rt != (int32_t)rt;
82 }
83
84 if (unlikely(overflow)) {
85 rt = 0; /* Undefined */
86 }
87
88 if (oe) {
f32899de 89 helper_update_ov_legacy(env, overflow);
a98eb9e9
TM
90 }
91
92 return (target_ulong)rt;
93}
94
98d1eb27
TM
95#if defined(TARGET_PPC64)
96
97uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
98{
99 uint64_t rt = 0;
100 int overflow = 0;
101
102 overflow = divu128(&rt, &ra, rb);
103
104 if (unlikely(overflow)) {
105 rt = 0; /* Undefined */
106 }
107
108 if (oe) {
f32899de 109 helper_update_ov_legacy(env, overflow);
98d1eb27
TM
110 }
111
112 return rt;
113}
114
e44259b6
TM
115uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
116{
117 int64_t rt = 0;
118 int64_t ra = (int64_t)rau;
119 int64_t rb = (int64_t)rbu;
120 int overflow = divs128(&rt, &ra, rb);
121
122 if (unlikely(overflow)) {
123 rt = 0; /* Undefined */
124 }
125
126 if (oe) {
f32899de 127 helper_update_ov_legacy(env, overflow);
e44259b6
TM
128 }
129
130 return rt;
131}
132
98d1eb27
TM
133#endif
134
135
64654ded 136#if defined(TARGET_PPC64)
082ce330
ND
137/* if x = 0xab, returns 0xababababababababa */
138#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
139
140/* substract 1 from each byte, and with inverse, check if MSB is set at each
141 * byte.
142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
144 */
145#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
146
147/* When you XOR the pattern and there is a match, that byte will be zero */
148#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
149
150uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
151{
efa73196 152 return hasvalue(rb, ra) ? CRF_GT : 0;
082ce330
ND
153}
154
155#undef pattern
156#undef haszero
157#undef hasvalue
158
fec5c62a
RB
159/* Return invalid random number.
160 *
161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
162 * random number
163 */
164target_ulong helper_darn32(void)
165{
166 return -1;
167}
168
169target_ulong helper_darn64(void)
170{
171 return -1;
172}
173
64654ded
BS
174#endif
175
86ba37ed
TM
176#if defined(TARGET_PPC64)
177
178uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
179{
180 int i;
181 uint64_t ra = 0;
182
183 for (i = 0; i < 8; i++) {
184 int index = (rs >> (i*8)) & 0xFF;
185 if (index < 64) {
a6a444a8 186 if (rb & PPC_BIT(index)) {
86ba37ed
TM
187 ra |= 1 << i;
188 }
189 }
190 }
191 return ra;
192}
193
194#endif
195
fcfda20f
AJ
196target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
197{
198 target_ulong mask = 0xff;
199 target_ulong ra = 0;
200 int i;
201
202 for (i = 0; i < sizeof(target_ulong); i++) {
203 if ((rs & mask) == (rb & mask)) {
204 ra |= mask;
205 }
206 mask <<= 8;
207 }
208 return ra;
209}
210
64654ded 211/* shift right arithmetic helper */
d15f74fb
BS
212target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
213 target_ulong shift)
64654ded
BS
214{
215 int32_t ret;
216
217 if (likely(!(shift & 0x20))) {
218 if (likely((uint32_t)shift != 0)) {
219 shift &= 0x1f;
220 ret = (int32_t)value >> shift;
221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
af1c259f 222 env->ca32 = env->ca = 0;
64654ded 223 } else {
af1c259f 224 env->ca32 = env->ca = 1;
64654ded
BS
225 }
226 } else {
227 ret = (int32_t)value;
af1c259f 228 env->ca32 = env->ca = 0;
64654ded
BS
229 }
230 } else {
231 ret = (int32_t)value >> 31;
af1c259f 232 env->ca32 = env->ca = (ret != 0);
64654ded
BS
233 }
234 return (target_long)ret;
235}
236
237#if defined(TARGET_PPC64)
d15f74fb
BS
238target_ulong helper_srad(CPUPPCState *env, target_ulong value,
239 target_ulong shift)
64654ded
BS
240{
241 int64_t ret;
242
243 if (likely(!(shift & 0x40))) {
244 if (likely((uint64_t)shift != 0)) {
245 shift &= 0x3f;
246 ret = (int64_t)value >> shift;
4bc02e23 247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
af1c259f 248 env->ca32 = env->ca = 0;
64654ded 249 } else {
af1c259f 250 env->ca32 = env->ca = 1;
64654ded
BS
251 }
252 } else {
253 ret = (int64_t)value;
af1c259f 254 env->ca32 = env->ca = 0;
64654ded
BS
255 }
256 } else {
257 ret = (int64_t)value >> 63;
af1c259f 258 env->ca32 = env->ca = (ret != 0);
64654ded
BS
259 }
260 return ret;
261}
262#endif
263
264#if defined(TARGET_PPC64)
265target_ulong helper_popcntb(target_ulong val)
266{
79770002 267 /* Note that we don't fold past bytes */
64654ded
BS
268 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
269 0x5555555555555555ULL);
270 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
271 0x3333333333333333ULL);
272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
273 0x0f0f0f0f0f0f0f0fULL);
274 return val;
275}
276
277target_ulong helper_popcntw(target_ulong val)
278{
79770002 279 /* Note that we don't fold past words. */
64654ded
BS
280 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
281 0x5555555555555555ULL);
282 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
283 0x3333333333333333ULL);
284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
285 0x0f0f0f0f0f0f0f0fULL);
286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
287 0x00ff00ff00ff00ffULL);
288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
289 0x0000ffff0000ffffULL);
290 return val;
291}
64654ded
BS
292#else
293target_ulong helper_popcntb(target_ulong val)
294{
79770002 295 /* Note that we don't fold past bytes */
64654ded
BS
296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
299 return val;
300}
64654ded
BS
301#endif
302
303/*****************************************************************************/
304/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 305target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
306{
307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
308
309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
310 (int32_t)arg2 == 0) {
311 env->spr[SPR_MQ] = 0;
312 return INT32_MIN;
313 } else {
314 env->spr[SPR_MQ] = tmp % arg2;
315 return tmp / (int32_t)arg2;
316 }
317}
318
d15f74fb
BS
319target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
320 target_ulong arg2)
64654ded
BS
321{
322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
323
324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
325 (int32_t)arg2 == 0) {
da91a00f 326 env->so = env->ov = 1;
64654ded
BS
327 env->spr[SPR_MQ] = 0;
328 return INT32_MIN;
329 } else {
330 env->spr[SPR_MQ] = tmp % arg2;
331 tmp /= (int32_t)arg2;
332 if ((int32_t)tmp != tmp) {
da91a00f 333 env->so = env->ov = 1;
64654ded 334 } else {
da91a00f 335 env->ov = 0;
64654ded
BS
336 }
337 return tmp;
338 }
339}
340
d15f74fb
BS
341target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
342 target_ulong arg2)
64654ded
BS
343{
344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
345 (int32_t)arg2 == 0) {
346 env->spr[SPR_MQ] = 0;
347 return INT32_MIN;
348 } else {
349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
350 return (int32_t)arg1 / (int32_t)arg2;
351 }
352}
353
d15f74fb
BS
354target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
355 target_ulong arg2)
64654ded
BS
356{
357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
358 (int32_t)arg2 == 0) {
da91a00f 359 env->so = env->ov = 1;
64654ded
BS
360 env->spr[SPR_MQ] = 0;
361 return INT32_MIN;
362 } else {
da91a00f 363 env->ov = 0;
64654ded
BS
364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
365 return (int32_t)arg1 / (int32_t)arg2;
366 }
367}
368
369/*****************************************************************************/
370/* 602 specific instructions */
371/* mfrom is the most crazy instruction ever seen, imho ! */
372/* Real implementation uses a ROM table. Do the same */
373/* Extremely decomposed:
374 * -arg / 256
375 * return 256 * log10(10 + 1.0) + 0.5
376 */
377#if !defined(CONFIG_USER_ONLY)
378target_ulong helper_602_mfrom(target_ulong arg)
379{
380 if (likely(arg < 602)) {
5b27a92d 381#include "mfrom_table.inc.c"
64654ded
BS
382 return mfrom_ROM_table[arg];
383 } else {
384 return 0;
385 }
386}
387#endif
388
389/*****************************************************************************/
390/* Altivec extension helpers */
391#if defined(HOST_WORDS_BIGENDIAN)
392#define HI_IDX 0
393#define LO_IDX 1
394#else
395#define HI_IDX 1
396#define LO_IDX 0
397#endif
398
399#if defined(HOST_WORDS_BIGENDIAN)
400#define VECTOR_FOR_INORDER_I(index, element) \
401 for (index = 0; index < ARRAY_SIZE(r->element); index++)
402#else
403#define VECTOR_FOR_INORDER_I(index, element) \
404 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
405#endif
406
64654ded
BS
407/* Saturating arithmetic helpers. */
408#define SATCVT(from, to, from_type, to_type, min, max) \
409 static inline to_type cvt##from##to(from_type x, int *sat) \
410 { \
411 to_type r; \
412 \
413 if (x < (from_type)min) { \
414 r = min; \
415 *sat = 1; \
416 } else if (x > (from_type)max) { \
417 r = max; \
418 *sat = 1; \
419 } else { \
420 r = x; \
421 } \
422 return r; \
423 }
424#define SATCVTU(from, to, from_type, to_type, min, max) \
425 static inline to_type cvt##from##to(from_type x, int *sat) \
426 { \
427 to_type r; \
428 \
429 if (x > (from_type)max) { \
430 r = max; \
431 *sat = 1; \
432 } else { \
433 r = x; \
434 } \
435 return r; \
436 }
437SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
438SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
439SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
440
441SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
442SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
443SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
444SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
445SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
446SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
447#undef SATCVT
448#undef SATCVTU
449
450void helper_lvsl(ppc_avr_t *r, target_ulong sh)
451{
452 int i, j = (sh & 0xf);
453
454 VECTOR_FOR_INORDER_I(i, u8) {
455 r->u8[i] = j++;
456 }
457}
458
459void helper_lvsr(ppc_avr_t *r, target_ulong sh)
460{
461 int i, j = 0x10 - (sh & 0xf);
462
463 VECTOR_FOR_INORDER_I(i, u8) {
464 r->u8[i] = j++;
465 }
466}
467
d15f74fb 468void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
469{
470#if defined(HOST_WORDS_BIGENDIAN)
471 env->vscr = r->u32[3];
472#else
473 env->vscr = r->u32[0];
474#endif
475 set_flush_to_zero(vscr_nj, &env->vec_status);
476}
477
478void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
479{
480 int i;
481
482 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
483 r->u32[i] = ~a->u32[i] < b->u32[i];
484 }
485}
486
5c69452c
AK
487/* vprtybw */
488void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
489{
490 int i;
491 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
492 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
493 res ^= res >> 8;
494 r->u32[i] = res & 1;
495 }
496}
497
498/* vprtybd */
499void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
500{
501 int i;
502 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
503 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
504 res ^= res >> 16;
505 res ^= res >> 8;
506 r->u64[i] = res & 1;
507 }
508}
509
510/* vprtybq */
511void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
512{
513 uint64_t res = b->u64[0] ^ b->u64[1];
514 res ^= res >> 32;
515 res ^= res >> 16;
516 res ^= res >> 8;
517 r->u64[LO_IDX] = res & 1;
518 r->u64[HI_IDX] = 0;
519}
520
64654ded
BS
521#define VARITH_DO(name, op, element) \
522 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
523 { \
524 int i; \
525 \
526 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
527 r->element[i] = a->element[i] op b->element[i]; \
528 } \
529 }
530#define VARITH(suffix, element) \
531 VARITH_DO(add##suffix, +, element) \
532 VARITH_DO(sub##suffix, -, element)
533VARITH(ubm, u8)
534VARITH(uhm, u16)
535VARITH(uwm, u32)
56eabc75 536VARITH(udm, u64)
953f0f58 537VARITH_DO(muluwm, *, u32)
64654ded
BS
538#undef VARITH_DO
539#undef VARITH
540
541#define VARITHFP(suffix, func) \
d15f74fb
BS
542 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
543 ppc_avr_t *b) \
64654ded
BS
544 { \
545 int i; \
546 \
05ee3e8a
MCA
547 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
548 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
64654ded
BS
549 } \
550 }
551VARITHFP(addfp, float32_add)
552VARITHFP(subfp, float32_sub)
db1babb8
AJ
553VARITHFP(minfp, float32_min)
554VARITHFP(maxfp, float32_max)
64654ded
BS
555#undef VARITHFP
556
2f93c23f
AJ
557#define VARITHFPFMA(suffix, type) \
558 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
559 ppc_avr_t *b, ppc_avr_t *c) \
560 { \
561 int i; \
05ee3e8a
MCA
562 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
563 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
564 type, &env->vec_status); \
2f93c23f
AJ
565 } \
566 }
567VARITHFPFMA(maddfp, 0);
568VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
569#undef VARITHFPFMA
570
64654ded
BS
571#define VARITHSAT_CASE(type, op, cvt, element) \
572 { \
573 type result = (type)a->element[i] op (type)b->element[i]; \
574 r->element[i] = cvt(result, &sat); \
575 }
576
577#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
578 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
579 ppc_avr_t *b) \
64654ded
BS
580 { \
581 int sat = 0; \
582 int i; \
583 \
584 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
585 switch (sizeof(r->element[0])) { \
586 case 1: \
587 VARITHSAT_CASE(optype, op, cvt, element); \
588 break; \
589 case 2: \
590 VARITHSAT_CASE(optype, op, cvt, element); \
591 break; \
592 case 4: \
593 VARITHSAT_CASE(optype, op, cvt, element); \
594 break; \
595 } \
596 } \
597 if (sat) { \
598 env->vscr |= (1 << VSCR_SAT); \
599 } \
600 }
601#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
602 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
603 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
604#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
605 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
606 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
607VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
608VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
609VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
610VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
611VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
612VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
613#undef VARITHSAT_CASE
614#undef VARITHSAT_DO
615#undef VARITHSAT_SIGNED
616#undef VARITHSAT_UNSIGNED
617
618#define VAVG_DO(name, element, etype) \
619 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
620 { \
621 int i; \
622 \
623 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
624 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
625 r->element[i] = x >> 1; \
626 } \
627 }
628
629#define VAVG(type, signed_element, signed_type, unsigned_element, \
630 unsigned_type) \
631 VAVG_DO(avgs##type, signed_element, signed_type) \
632 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
633VAVG(b, s8, int16_t, u8, uint16_t)
634VAVG(h, s16, int32_t, u16, uint32_t)
635VAVG(w, s32, int64_t, u32, uint64_t)
636#undef VAVG_DO
637#undef VAVG
638
37707059
SD
639#define VABSDU_DO(name, element) \
640void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
641{ \
642 int i; \
643 \
644 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
645 r->element[i] = (a->element[i] > b->element[i]) ? \
646 (a->element[i] - b->element[i]) : \
647 (b->element[i] - a->element[i]); \
648 } \
649}
650
651/* VABSDU - Vector absolute difference unsigned
652 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
653 * element - element type to access from vector
654 */
655#define VABSDU(type, element) \
656 VABSDU_DO(absdu##type, element)
657VABSDU(b, u8)
658VABSDU(h, u16)
659VABSDU(w, u32)
660#undef VABSDU_DO
661#undef VABSDU
662
64654ded 663#define VCF(suffix, cvt, element) \
d15f74fb
BS
664 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
665 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
666 { \
667 int i; \
668 \
05ee3e8a 669 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
64654ded 670 float32 t = cvt(b->element[i], &env->vec_status); \
05ee3e8a 671 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
64654ded
BS
672 } \
673 }
674VCF(ux, uint32_to_float32, u32)
675VCF(sx, int32_to_float32, s32)
676#undef VCF
677
678#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
680 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 681 { \
6f3dab41
TM
682 uint64_t ones = (uint64_t)-1; \
683 uint64_t all = ones; \
684 uint64_t none = 0; \
64654ded
BS
685 int i; \
686 \
687 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 688 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
689 ones : 0x0); \
690 switch (sizeof(a->element[0])) { \
6f3dab41
TM
691 case 8: \
692 r->u64[i] = result; \
693 break; \
64654ded
BS
694 case 4: \
695 r->u32[i] = result; \
696 break; \
697 case 2: \
698 r->u16[i] = result; \
699 break; \
700 case 1: \
701 r->u8[i] = result; \
702 break; \
703 } \
704 all &= result; \
705 none |= result; \
706 } \
707 if (record) { \
708 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
709 } \
710 }
711#define VCMP(suffix, compare, element) \
712 VCMP_DO(suffix, compare, element, 0) \
713 VCMP_DO(suffix##_dot, compare, element, 1)
714VCMP(equb, ==, u8)
715VCMP(equh, ==, u16)
716VCMP(equw, ==, u32)
6f3dab41 717VCMP(equd, ==, u64)
64654ded
BS
718VCMP(gtub, >, u8)
719VCMP(gtuh, >, u16)
720VCMP(gtuw, >, u32)
6f3dab41 721VCMP(gtud, >, u64)
64654ded
BS
722VCMP(gtsb, >, s8)
723VCMP(gtsh, >, s16)
724VCMP(gtsw, >, s32)
6f3dab41 725VCMP(gtsd, >, s64)
64654ded
BS
726#undef VCMP_DO
727#undef VCMP
728
0fa59364
RS
729#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
730void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
f7cc8466
SB
731 ppc_avr_t *a, ppc_avr_t *b) \
732{ \
733 etype ones = (etype)-1; \
734 etype all = ones; \
0fa59364 735 etype result, none = 0; \
f7cc8466
SB
736 int i; \
737 \
738 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
0fa59364
RS
739 if (cmpzero) { \
740 result = ((a->element[i] == 0) \
f7cc8466
SB
741 || (b->element[i] == 0) \
742 || (a->element[i] != b->element[i]) ? \
743 ones : 0x0); \
0fa59364
RS
744 } else { \
745 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
746 } \
f7cc8466
SB
747 r->element[i] = result; \
748 all &= result; \
749 none |= result; \
750 } \
751 if (record) { \
752 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
753 } \
754}
755
756/* VCMPNEZ - Vector compare not equal to zero
757 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
758 * element - element type to access from vector
759 */
0fa59364
RS
760#define VCMPNE(suffix, element, etype, cmpzero) \
761 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
762 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
763VCMPNE(zb, u8, uint8_t, 1)
764VCMPNE(zh, u16, uint16_t, 1)
765VCMPNE(zw, u32, uint32_t, 1)
766VCMPNE(b, u8, uint8_t, 0)
767VCMPNE(h, u16, uint16_t, 0)
768VCMPNE(w, u32, uint32_t, 0)
769#undef VCMPNE_DO
770#undef VCMPNE
f7cc8466 771
64654ded 772#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
773 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
774 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
775 { \
776 uint32_t ones = (uint32_t)-1; \
777 uint32_t all = ones; \
778 uint32_t none = 0; \
779 int i; \
780 \
05ee3e8a 781 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
64654ded 782 uint32_t result; \
05ee3e8a 783 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
64654ded
BS
784 &env->vec_status); \
785 if (rel == float_relation_unordered) { \
786 result = 0; \
787 } else if (rel compare order) { \
788 result = ones; \
789 } else { \
790 result = 0; \
791 } \
792 r->u32[i] = result; \
793 all &= result; \
794 none |= result; \
795 } \
796 if (record) { \
797 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
798 } \
799 }
800#define VCMPFP(suffix, compare, order) \
801 VCMPFP_DO(suffix, compare, order, 0) \
802 VCMPFP_DO(suffix##_dot, compare, order, 1)
803VCMPFP(eqfp, ==, float_relation_equal)
804VCMPFP(gefp, !=, float_relation_less)
805VCMPFP(gtfp, ==, float_relation_greater)
806#undef VCMPFP_DO
807#undef VCMPFP
808
d15f74fb
BS
809static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
810 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
811{
812 int i;
813 int all_in = 0;
814
05ee3e8a
MCA
815 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
816 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
817 &env->vec_status);
64654ded
BS
818 if (le_rel == float_relation_unordered) {
819 r->u32[i] = 0xc0000000;
4007b8de 820 all_in = 1;
64654ded 821 } else {
05ee3e8a
MCA
822 float32 bneg = float32_chs(b->f32[i]);
823 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
824 &env->vec_status);
64654ded
BS
825 int le = le_rel != float_relation_greater;
826 int ge = ge_rel != float_relation_less;
827
828 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
829 all_in |= (!le | !ge);
830 }
831 }
832 if (record) {
833 env->crf[6] = (all_in == 0) << 1;
834 }
835}
836
d15f74fb 837void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 838{
d15f74fb 839 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
840}
841
d15f74fb
BS
842void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
843 ppc_avr_t *b)
64654ded 844{
d15f74fb 845 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
846}
847
848#define VCT(suffix, satcvt, element) \
d15f74fb
BS
849 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
850 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
851 { \
852 int i; \
853 int sat = 0; \
854 float_status s = env->vec_status; \
855 \
856 set_float_rounding_mode(float_round_to_zero, &s); \
05ee3e8a
MCA
857 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
858 if (float32_is_any_nan(b->f32[i])) { \
64654ded
BS
859 r->element[i] = 0; \
860 } else { \
05ee3e8a 861 float64 t = float32_to_float64(b->f32[i], &s); \
64654ded
BS
862 int64_t j; \
863 \
864 t = float64_scalbn(t, uim, &s); \
865 j = float64_to_int64(t, &s); \
866 r->element[i] = satcvt(j, &sat); \
867 } \
868 } \
869 if (sat) { \
870 env->vscr |= (1 << VSCR_SAT); \
871 } \
872 }
873VCT(uxs, cvtsduw, u32)
874VCT(sxs, cvtsdsw, s32)
875#undef VCT
876
4879538c
RS
877target_ulong helper_vclzlsbb(ppc_avr_t *r)
878{
879 target_ulong count = 0;
880 int i;
881 VECTOR_FOR_INORDER_I(i, u8) {
882 if (r->u8[i] & 0x01) {
883 break;
884 }
885 count++;
886 }
887 return count;
888}
889
890target_ulong helper_vctzlsbb(ppc_avr_t *r)
891{
892 target_ulong count = 0;
893 int i;
894#if defined(HOST_WORDS_BIGENDIAN)
895 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
896#else
897 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
898#endif
899 if (r->u8[i] & 0x01) {
900 break;
901 }
902 count++;
903 }
904 return count;
905}
906
d15f74fb
BS
907void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
908 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
909{
910 int sat = 0;
911 int i;
912
913 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
914 int32_t prod = a->s16[i] * b->s16[i];
915 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
916
917 r->s16[i] = cvtswsh(t, &sat);
918 }
919
920 if (sat) {
921 env->vscr |= (1 << VSCR_SAT);
922 }
923}
924
d15f74fb
BS
925void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
926 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
927{
928 int sat = 0;
929 int i;
930
931 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
932 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
933 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
934 r->s16[i] = cvtswsh(t, &sat);
935 }
936
937 if (sat) {
938 env->vscr |= (1 << VSCR_SAT);
939 }
940}
941
942#define VMINMAX_DO(name, compare, element) \
943 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
944 { \
945 int i; \
946 \
947 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
948 if (a->element[i] compare b->element[i]) { \
949 r->element[i] = b->element[i]; \
950 } else { \
951 r->element[i] = a->element[i]; \
952 } \
953 } \
954 }
955#define VMINMAX(suffix, element) \
956 VMINMAX_DO(min##suffix, >, element) \
957 VMINMAX_DO(max##suffix, <, element)
958VMINMAX(sb, s8)
959VMINMAX(sh, s16)
960VMINMAX(sw, s32)
8203e31b 961VMINMAX(sd, s64)
64654ded
BS
962VMINMAX(ub, u8)
963VMINMAX(uh, u16)
964VMINMAX(uw, u32)
8203e31b 965VMINMAX(ud, u64)
64654ded
BS
966#undef VMINMAX_DO
967#undef VMINMAX
968
64654ded
BS
969void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
970{
971 int i;
972
973 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
974 int32_t prod = a->s16[i] * b->s16[i];
975 r->s16[i] = (int16_t) (prod + c->s16[i]);
976 }
977}
978
979#define VMRG_DO(name, element, highp) \
980 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
981 { \
982 ppc_avr_t result; \
983 int i; \
984 size_t n_elems = ARRAY_SIZE(r->element); \
985 \
986 for (i = 0; i < n_elems / 2; i++) { \
987 if (highp) { \
988 result.element[i*2+HI_IDX] = a->element[i]; \
989 result.element[i*2+LO_IDX] = b->element[i]; \
990 } else { \
991 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
992 b->element[n_elems - i - 1]; \
993 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
994 a->element[n_elems - i - 1]; \
995 } \
996 } \
997 *r = result; \
998 }
999#if defined(HOST_WORDS_BIGENDIAN)
1000#define MRGHI 0
1001#define MRGLO 1
1002#else
1003#define MRGHI 1
1004#define MRGLO 0
1005#endif
1006#define VMRG(suffix, element) \
1007 VMRG_DO(mrgl##suffix, element, MRGHI) \
1008 VMRG_DO(mrgh##suffix, element, MRGLO)
1009VMRG(b, u8)
1010VMRG(h, u16)
1011VMRG(w, u32)
1012#undef VMRG_DO
1013#undef VMRG
1014#undef MRGHI
1015#undef MRGLO
1016
d15f74fb
BS
1017void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1018 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1019{
1020 int32_t prod[16];
1021 int i;
1022
1023 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1024 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1025 }
1026
1027 VECTOR_FOR_INORDER_I(i, s32) {
1028 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1029 prod[4 * i + 2] + prod[4 * i + 3];
1030 }
1031}
1032
d15f74fb
BS
1033void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1034 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1035{
1036 int32_t prod[8];
1037 int i;
1038
1039 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1040 prod[i] = a->s16[i] * b->s16[i];
1041 }
1042
1043 VECTOR_FOR_INORDER_I(i, s32) {
1044 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1045 }
1046}
1047
d15f74fb
BS
1048void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1049 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1050{
1051 int32_t prod[8];
1052 int i;
1053 int sat = 0;
1054
1055 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1056 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1057 }
1058
1059 VECTOR_FOR_INORDER_I(i, s32) {
1060 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1061
1062 r->u32[i] = cvtsdsw(t, &sat);
1063 }
1064
1065 if (sat) {
1066 env->vscr |= (1 << VSCR_SAT);
1067 }
1068}
1069
d15f74fb
BS
1070void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1071 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1072{
1073 uint16_t prod[16];
1074 int i;
1075
1076 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1077 prod[i] = a->u8[i] * b->u8[i];
1078 }
1079
1080 VECTOR_FOR_INORDER_I(i, u32) {
1081 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1082 prod[4 * i + 2] + prod[4 * i + 3];
1083 }
1084}
1085
d15f74fb
BS
1086void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1087 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1088{
1089 uint32_t prod[8];
1090 int i;
1091
1092 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1093 prod[i] = a->u16[i] * b->u16[i];
1094 }
1095
1096 VECTOR_FOR_INORDER_I(i, u32) {
1097 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1098 }
1099}
1100
d15f74fb
BS
1101void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1102 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1103{
1104 uint32_t prod[8];
1105 int i;
1106 int sat = 0;
1107
1108 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1109 prod[i] = a->u16[i] * b->u16[i];
1110 }
1111
1112 VECTOR_FOR_INORDER_I(i, s32) {
1113 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1114
1115 r->u32[i] = cvtuduw(t, &sat);
1116 }
1117
1118 if (sat) {
1119 env->vscr |= (1 << VSCR_SAT);
1120 }
1121}
1122
aa9e930c 1123#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1124 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1125 { \
1126 int i; \
1127 \
1128 VECTOR_FOR_INORDER_I(i, prod_element) { \
1129 if (evenp) { \
aa9e930c
TM
1130 r->prod_element[i] = \
1131 (cast)a->mul_element[i * 2 + HI_IDX] * \
1132 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1133 } else { \
aa9e930c
TM
1134 r->prod_element[i] = \
1135 (cast)a->mul_element[i * 2 + LO_IDX] * \
1136 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1137 } \
1138 } \
1139 }
aa9e930c
TM
1140#define VMUL(suffix, mul_element, prod_element, cast) \
1141 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1142 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1143VMUL(sb, s8, s16, int16_t)
1144VMUL(sh, s16, s32, int32_t)
63be0936 1145VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1146VMUL(ub, u8, u16, uint16_t)
1147VMUL(uh, u16, u32, uint32_t)
63be0936 1148VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1149#undef VMUL_DO
1150#undef VMUL
1151
d15f74fb
BS
1152void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1153 ppc_avr_t *c)
64654ded
BS
1154{
1155 ppc_avr_t result;
1156 int i;
1157
1158 VECTOR_FOR_INORDER_I(i, u8) {
1159 int s = c->u8[i] & 0x1f;
1160#if defined(HOST_WORDS_BIGENDIAN)
1161 int index = s & 0xf;
1162#else
1163 int index = 15 - (s & 0xf);
1164#endif
1165
1166 if (s & 0x10) {
1167 result.u8[i] = b->u8[index];
1168 } else {
1169 result.u8[i] = a->u8[index];
1170 }
1171 }
1172 *r = result;
1173}
1174
ab045436
RS
1175void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1176 ppc_avr_t *c)
1177{
1178 ppc_avr_t result;
1179 int i;
1180
1181 VECTOR_FOR_INORDER_I(i, u8) {
1182 int s = c->u8[i] & 0x1f;
1183#if defined(HOST_WORDS_BIGENDIAN)
1184 int index = 15 - (s & 0xf);
1185#else
1186 int index = s & 0xf;
1187#endif
1188
1189 if (s & 0x10) {
1190 result.u8[i] = a->u8[index];
1191 } else {
1192 result.u8[i] = b->u8[index];
1193 }
1194 }
1195 *r = result;
1196}
1197
4d82038e
TM
1198#if defined(HOST_WORDS_BIGENDIAN)
1199#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1200#define VBPERMD_INDEX(i) (i)
4d82038e 1201#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
01fe9a47 1202#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
4d82038e
TM
1203#else
1204#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
01fe9a47 1205#define VBPERMD_INDEX(i) (1 - i)
4d82038e 1206#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
01fe9a47
RS
1207#define EXTRACT_BIT(avr, i, index) \
1208 (extract64((avr)->u64[1 - i], 63 - index, 1))
4d82038e
TM
1209#endif
1210
01fe9a47
RS
1211void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1212{
1213 int i, j;
1214 ppc_avr_t result = { .u64 = { 0, 0 } };
1215 VECTOR_FOR_INORDER_I(i, u64) {
1216 for (j = 0; j < 8; j++) {
1217 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1218 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1219 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1220 }
1221 }
1222 }
1223 *r = result;
1224}
1225
4d82038e
TM
1226void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1227{
1228 int i;
1229 uint64_t perm = 0;
1230
1231 VECTOR_FOR_INORDER_I(i, u8) {
1232 int index = VBPERMQ_INDEX(b, i);
1233
1234 if (index < 128) {
1235 uint64_t mask = (1ull << (63-(index & 0x3F)));
1236 if (a->u64[VBPERMQ_DW(index)] & mask) {
1237 perm |= (0x8000 >> i);
1238 }
1239 }
1240 }
1241
1242 r->u64[HI_IDX] = perm;
1243 r->u64[LO_IDX] = 0;
1244}
1245
1246#undef VBPERMQ_INDEX
1247#undef VBPERMQ_DW
1248
cfd54a04 1249static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1250 0x0000000000000000ull, /* 00 */
1251 0x0000000000000080ull, /* 01 */
1252 0x0000000000008000ull, /* 02 */
1253 0x0000000000008080ull, /* 03 */
1254 0x0000000000800000ull, /* 04 */
1255 0x0000000000800080ull, /* 05 */
1256 0x0000000000808000ull, /* 06 */
1257 0x0000000000808080ull, /* 07 */
1258 0x0000000080000000ull, /* 08 */
1259 0x0000000080000080ull, /* 09 */
1260 0x0000000080008000ull, /* 0A */
1261 0x0000000080008080ull, /* 0B */
1262 0x0000000080800000ull, /* 0C */
1263 0x0000000080800080ull, /* 0D */
1264 0x0000000080808000ull, /* 0E */
1265 0x0000000080808080ull, /* 0F */
1266 0x0000008000000000ull, /* 10 */
1267 0x0000008000000080ull, /* 11 */
1268 0x0000008000008000ull, /* 12 */
1269 0x0000008000008080ull, /* 13 */
1270 0x0000008000800000ull, /* 14 */
1271 0x0000008000800080ull, /* 15 */
1272 0x0000008000808000ull, /* 16 */
1273 0x0000008000808080ull, /* 17 */
1274 0x0000008080000000ull, /* 18 */
1275 0x0000008080000080ull, /* 19 */
1276 0x0000008080008000ull, /* 1A */
1277 0x0000008080008080ull, /* 1B */
1278 0x0000008080800000ull, /* 1C */
1279 0x0000008080800080ull, /* 1D */
1280 0x0000008080808000ull, /* 1E */
1281 0x0000008080808080ull, /* 1F */
1282 0x0000800000000000ull, /* 20 */
1283 0x0000800000000080ull, /* 21 */
1284 0x0000800000008000ull, /* 22 */
1285 0x0000800000008080ull, /* 23 */
1286 0x0000800000800000ull, /* 24 */
1287 0x0000800000800080ull, /* 25 */
1288 0x0000800000808000ull, /* 26 */
1289 0x0000800000808080ull, /* 27 */
1290 0x0000800080000000ull, /* 28 */
1291 0x0000800080000080ull, /* 29 */
1292 0x0000800080008000ull, /* 2A */
1293 0x0000800080008080ull, /* 2B */
1294 0x0000800080800000ull, /* 2C */
1295 0x0000800080800080ull, /* 2D */
1296 0x0000800080808000ull, /* 2E */
1297 0x0000800080808080ull, /* 2F */
1298 0x0000808000000000ull, /* 30 */
1299 0x0000808000000080ull, /* 31 */
1300 0x0000808000008000ull, /* 32 */
1301 0x0000808000008080ull, /* 33 */
1302 0x0000808000800000ull, /* 34 */
1303 0x0000808000800080ull, /* 35 */
1304 0x0000808000808000ull, /* 36 */
1305 0x0000808000808080ull, /* 37 */
1306 0x0000808080000000ull, /* 38 */
1307 0x0000808080000080ull, /* 39 */
1308 0x0000808080008000ull, /* 3A */
1309 0x0000808080008080ull, /* 3B */
1310 0x0000808080800000ull, /* 3C */
1311 0x0000808080800080ull, /* 3D */
1312 0x0000808080808000ull, /* 3E */
1313 0x0000808080808080ull, /* 3F */
1314 0x0080000000000000ull, /* 40 */
1315 0x0080000000000080ull, /* 41 */
1316 0x0080000000008000ull, /* 42 */
1317 0x0080000000008080ull, /* 43 */
1318 0x0080000000800000ull, /* 44 */
1319 0x0080000000800080ull, /* 45 */
1320 0x0080000000808000ull, /* 46 */
1321 0x0080000000808080ull, /* 47 */
1322 0x0080000080000000ull, /* 48 */
1323 0x0080000080000080ull, /* 49 */
1324 0x0080000080008000ull, /* 4A */
1325 0x0080000080008080ull, /* 4B */
1326 0x0080000080800000ull, /* 4C */
1327 0x0080000080800080ull, /* 4D */
1328 0x0080000080808000ull, /* 4E */
1329 0x0080000080808080ull, /* 4F */
1330 0x0080008000000000ull, /* 50 */
1331 0x0080008000000080ull, /* 51 */
1332 0x0080008000008000ull, /* 52 */
1333 0x0080008000008080ull, /* 53 */
1334 0x0080008000800000ull, /* 54 */
1335 0x0080008000800080ull, /* 55 */
1336 0x0080008000808000ull, /* 56 */
1337 0x0080008000808080ull, /* 57 */
1338 0x0080008080000000ull, /* 58 */
1339 0x0080008080000080ull, /* 59 */
1340 0x0080008080008000ull, /* 5A */
1341 0x0080008080008080ull, /* 5B */
1342 0x0080008080800000ull, /* 5C */
1343 0x0080008080800080ull, /* 5D */
1344 0x0080008080808000ull, /* 5E */
1345 0x0080008080808080ull, /* 5F */
1346 0x0080800000000000ull, /* 60 */
1347 0x0080800000000080ull, /* 61 */
1348 0x0080800000008000ull, /* 62 */
1349 0x0080800000008080ull, /* 63 */
1350 0x0080800000800000ull, /* 64 */
1351 0x0080800000800080ull, /* 65 */
1352 0x0080800000808000ull, /* 66 */
1353 0x0080800000808080ull, /* 67 */
1354 0x0080800080000000ull, /* 68 */
1355 0x0080800080000080ull, /* 69 */
1356 0x0080800080008000ull, /* 6A */
1357 0x0080800080008080ull, /* 6B */
1358 0x0080800080800000ull, /* 6C */
1359 0x0080800080800080ull, /* 6D */
1360 0x0080800080808000ull, /* 6E */
1361 0x0080800080808080ull, /* 6F */
1362 0x0080808000000000ull, /* 70 */
1363 0x0080808000000080ull, /* 71 */
1364 0x0080808000008000ull, /* 72 */
1365 0x0080808000008080ull, /* 73 */
1366 0x0080808000800000ull, /* 74 */
1367 0x0080808000800080ull, /* 75 */
1368 0x0080808000808000ull, /* 76 */
1369 0x0080808000808080ull, /* 77 */
1370 0x0080808080000000ull, /* 78 */
1371 0x0080808080000080ull, /* 79 */
1372 0x0080808080008000ull, /* 7A */
1373 0x0080808080008080ull, /* 7B */
1374 0x0080808080800000ull, /* 7C */
1375 0x0080808080800080ull, /* 7D */
1376 0x0080808080808000ull, /* 7E */
1377 0x0080808080808080ull, /* 7F */
1378 0x8000000000000000ull, /* 80 */
1379 0x8000000000000080ull, /* 81 */
1380 0x8000000000008000ull, /* 82 */
1381 0x8000000000008080ull, /* 83 */
1382 0x8000000000800000ull, /* 84 */
1383 0x8000000000800080ull, /* 85 */
1384 0x8000000000808000ull, /* 86 */
1385 0x8000000000808080ull, /* 87 */
1386 0x8000000080000000ull, /* 88 */
1387 0x8000000080000080ull, /* 89 */
1388 0x8000000080008000ull, /* 8A */
1389 0x8000000080008080ull, /* 8B */
1390 0x8000000080800000ull, /* 8C */
1391 0x8000000080800080ull, /* 8D */
1392 0x8000000080808000ull, /* 8E */
1393 0x8000000080808080ull, /* 8F */
1394 0x8000008000000000ull, /* 90 */
1395 0x8000008000000080ull, /* 91 */
1396 0x8000008000008000ull, /* 92 */
1397 0x8000008000008080ull, /* 93 */
1398 0x8000008000800000ull, /* 94 */
1399 0x8000008000800080ull, /* 95 */
1400 0x8000008000808000ull, /* 96 */
1401 0x8000008000808080ull, /* 97 */
1402 0x8000008080000000ull, /* 98 */
1403 0x8000008080000080ull, /* 99 */
1404 0x8000008080008000ull, /* 9A */
1405 0x8000008080008080ull, /* 9B */
1406 0x8000008080800000ull, /* 9C */
1407 0x8000008080800080ull, /* 9D */
1408 0x8000008080808000ull, /* 9E */
1409 0x8000008080808080ull, /* 9F */
1410 0x8000800000000000ull, /* A0 */
1411 0x8000800000000080ull, /* A1 */
1412 0x8000800000008000ull, /* A2 */
1413 0x8000800000008080ull, /* A3 */
1414 0x8000800000800000ull, /* A4 */
1415 0x8000800000800080ull, /* A5 */
1416 0x8000800000808000ull, /* A6 */
1417 0x8000800000808080ull, /* A7 */
1418 0x8000800080000000ull, /* A8 */
1419 0x8000800080000080ull, /* A9 */
1420 0x8000800080008000ull, /* AA */
1421 0x8000800080008080ull, /* AB */
1422 0x8000800080800000ull, /* AC */
1423 0x8000800080800080ull, /* AD */
1424 0x8000800080808000ull, /* AE */
1425 0x8000800080808080ull, /* AF */
1426 0x8000808000000000ull, /* B0 */
1427 0x8000808000000080ull, /* B1 */
1428 0x8000808000008000ull, /* B2 */
1429 0x8000808000008080ull, /* B3 */
1430 0x8000808000800000ull, /* B4 */
1431 0x8000808000800080ull, /* B5 */
1432 0x8000808000808000ull, /* B6 */
1433 0x8000808000808080ull, /* B7 */
1434 0x8000808080000000ull, /* B8 */
1435 0x8000808080000080ull, /* B9 */
1436 0x8000808080008000ull, /* BA */
1437 0x8000808080008080ull, /* BB */
1438 0x8000808080800000ull, /* BC */
1439 0x8000808080800080ull, /* BD */
1440 0x8000808080808000ull, /* BE */
1441 0x8000808080808080ull, /* BF */
1442 0x8080000000000000ull, /* C0 */
1443 0x8080000000000080ull, /* C1 */
1444 0x8080000000008000ull, /* C2 */
1445 0x8080000000008080ull, /* C3 */
1446 0x8080000000800000ull, /* C4 */
1447 0x8080000000800080ull, /* C5 */
1448 0x8080000000808000ull, /* C6 */
1449 0x8080000000808080ull, /* C7 */
1450 0x8080000080000000ull, /* C8 */
1451 0x8080000080000080ull, /* C9 */
1452 0x8080000080008000ull, /* CA */
1453 0x8080000080008080ull, /* CB */
1454 0x8080000080800000ull, /* CC */
1455 0x8080000080800080ull, /* CD */
1456 0x8080000080808000ull, /* CE */
1457 0x8080000080808080ull, /* CF */
1458 0x8080008000000000ull, /* D0 */
1459 0x8080008000000080ull, /* D1 */
1460 0x8080008000008000ull, /* D2 */
1461 0x8080008000008080ull, /* D3 */
1462 0x8080008000800000ull, /* D4 */
1463 0x8080008000800080ull, /* D5 */
1464 0x8080008000808000ull, /* D6 */
1465 0x8080008000808080ull, /* D7 */
1466 0x8080008080000000ull, /* D8 */
1467 0x8080008080000080ull, /* D9 */
1468 0x8080008080008000ull, /* DA */
1469 0x8080008080008080ull, /* DB */
1470 0x8080008080800000ull, /* DC */
1471 0x8080008080800080ull, /* DD */
1472 0x8080008080808000ull, /* DE */
1473 0x8080008080808080ull, /* DF */
1474 0x8080800000000000ull, /* E0 */
1475 0x8080800000000080ull, /* E1 */
1476 0x8080800000008000ull, /* E2 */
1477 0x8080800000008080ull, /* E3 */
1478 0x8080800000800000ull, /* E4 */
1479 0x8080800000800080ull, /* E5 */
1480 0x8080800000808000ull, /* E6 */
1481 0x8080800000808080ull, /* E7 */
1482 0x8080800080000000ull, /* E8 */
1483 0x8080800080000080ull, /* E9 */
1484 0x8080800080008000ull, /* EA */
1485 0x8080800080008080ull, /* EB */
1486 0x8080800080800000ull, /* EC */
1487 0x8080800080800080ull, /* ED */
1488 0x8080800080808000ull, /* EE */
1489 0x8080800080808080ull, /* EF */
1490 0x8080808000000000ull, /* F0 */
1491 0x8080808000000080ull, /* F1 */
1492 0x8080808000008000ull, /* F2 */
1493 0x8080808000008080ull, /* F3 */
1494 0x8080808000800000ull, /* F4 */
1495 0x8080808000800080ull, /* F5 */
1496 0x8080808000808000ull, /* F6 */
1497 0x8080808000808080ull, /* F7 */
1498 0x8080808080000000ull, /* F8 */
1499 0x8080808080000080ull, /* F9 */
1500 0x8080808080008000ull, /* FA */
1501 0x8080808080008080ull, /* FB */
1502 0x8080808080800000ull, /* FC */
1503 0x8080808080800080ull, /* FD */
1504 0x8080808080808000ull, /* FE */
1505 0x8080808080808080ull, /* FF */
1506};
1507
1508void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1509{
1510 int i;
1511 uint64_t t[2] = { 0, 0 };
1512
1513 VECTOR_FOR_INORDER_I(i, u8) {
1514#if defined(HOST_WORDS_BIGENDIAN)
1515 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1516#else
1517 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1518#endif
1519 }
1520
1521 r->u64[0] = t[0];
1522 r->u64[1] = t[1];
1523}
1524
b8476fc7
TM
1525#define PMSUM(name, srcfld, trgfld, trgtyp) \
1526void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1527{ \
1528 int i, j; \
1529 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1530 \
1531 VECTOR_FOR_INORDER_I(i, srcfld) { \
1532 prod[i] = 0; \
1533 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1534 if (a->srcfld[i] & (1ull<<j)) { \
1535 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1536 } \
1537 } \
1538 } \
1539 \
1540 VECTOR_FOR_INORDER_I(i, trgfld) { \
1541 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1542 } \
1543}
1544
1545PMSUM(vpmsumb, u8, u16, uint16_t)
1546PMSUM(vpmsumh, u16, u32, uint32_t)
1547PMSUM(vpmsumw, u32, u64, uint64_t)
1548
1549void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1550{
1551
1552#ifdef CONFIG_INT128
1553 int i, j;
1554 __uint128_t prod[2];
1555
1556 VECTOR_FOR_INORDER_I(i, u64) {
1557 prod[i] = 0;
1558 for (j = 0; j < 64; j++) {
1559 if (a->u64[i] & (1ull<<j)) {
1560 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1561 }
1562 }
1563 }
1564
1565 r->u128 = prod[0] ^ prod[1];
1566
1567#else
1568 int i, j;
1569 ppc_avr_t prod[2];
1570
1571 VECTOR_FOR_INORDER_I(i, u64) {
1572 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1573 for (j = 0; j < 64; j++) {
1574 if (a->u64[i] & (1ull<<j)) {
1575 ppc_avr_t bshift;
1576 if (j == 0) {
1577 bshift.u64[HI_IDX] = 0;
1578 bshift.u64[LO_IDX] = b->u64[i];
1579 } else {
1580 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1581 bshift.u64[LO_IDX] = b->u64[i] << j;
1582 }
1583 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1584 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1585 }
1586 }
1587 }
1588
1589 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1590 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1591#endif
1592}
1593
1594
64654ded
BS
1595#if defined(HOST_WORDS_BIGENDIAN)
1596#define PKBIG 1
1597#else
1598#define PKBIG 0
1599#endif
1600void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1601{
1602 int i, j;
1603 ppc_avr_t result;
1604#if defined(HOST_WORDS_BIGENDIAN)
1605 const ppc_avr_t *x[2] = { a, b };
1606#else
1607 const ppc_avr_t *x[2] = { b, a };
1608#endif
1609
1610 VECTOR_FOR_INORDER_I(i, u64) {
1611 VECTOR_FOR_INORDER_I(j, u32) {
1612 uint32_t e = x[i]->u32[j];
1613
1614 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1615 ((e >> 6) & 0x3e0) |
1616 ((e >> 3) & 0x1f));
1617 }
1618 }
1619 *r = result;
1620}
1621
1622#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1623 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1624 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1625 { \
1626 int i; \
1627 int sat = 0; \
1628 ppc_avr_t result; \
1629 ppc_avr_t *a0 = PKBIG ? a : b; \
1630 ppc_avr_t *a1 = PKBIG ? b : a; \
1631 \
1632 VECTOR_FOR_INORDER_I(i, from) { \
1633 result.to[i] = cvt(a0->from[i], &sat); \
1634 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1635 } \
1636 *r = result; \
1637 if (dosat && sat) { \
1638 env->vscr |= (1 << VSCR_SAT); \
1639 } \
1640 }
1641#define I(x, y) (x)
1642VPK(shss, s16, s8, cvtshsb, 1)
1643VPK(shus, s16, u8, cvtshub, 1)
1644VPK(swss, s32, s16, cvtswsh, 1)
1645VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1646VPK(sdss, s64, s32, cvtsdsw, 1)
1647VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1648VPK(uhus, u16, u8, cvtuhub, 1)
1649VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1650VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1651VPK(uhum, u16, u8, I, 0)
1652VPK(uwum, u32, u16, I, 0)
024215b2 1653VPK(udum, u64, u32, I, 0)
64654ded
BS
1654#undef I
1655#undef VPK
1656#undef PKBIG
1657
d15f74fb 1658void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1659{
1660 int i;
1661
05ee3e8a
MCA
1662 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1663 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
64654ded
BS
1664 }
1665}
1666
1667#define VRFI(suffix, rounding) \
d15f74fb
BS
1668 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1669 ppc_avr_t *b) \
64654ded
BS
1670 { \
1671 int i; \
1672 float_status s = env->vec_status; \
1673 \
1674 set_float_rounding_mode(rounding, &s); \
05ee3e8a
MCA
1675 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1676 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
64654ded
BS
1677 } \
1678 }
1679VRFI(n, float_round_nearest_even)
1680VRFI(m, float_round_down)
1681VRFI(p, float_round_up)
1682VRFI(z, float_round_to_zero)
1683#undef VRFI
1684
818692ff 1685#define VROTATE(suffix, element, mask) \
64654ded
BS
1686 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1687 { \
1688 int i; \
1689 \
1690 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1691 unsigned int shift = b->element[i] & mask; \
1692 r->element[i] = (a->element[i] << shift) | \
1693 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1694 } \
1695 }
818692ff
TM
1696VROTATE(b, u8, 0x7)
1697VROTATE(h, u16, 0xF)
1698VROTATE(w, u32, 0x1F)
2fdf78e6 1699VROTATE(d, u64, 0x3F)
64654ded
BS
1700#undef VROTATE
1701
d15f74fb 1702void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1703{
1704 int i;
1705
05ee3e8a
MCA
1706 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1707 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
64654ded 1708
05ee3e8a 1709 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1710 }
1711}
1712
09a245e1 1713#define VRLMI(name, size, element, insert) \
3e00884f
GS
1714void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1715{ \
1716 int i; \
1717 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1718 uint##size##_t src1 = a->element[i]; \
1719 uint##size##_t src2 = b->element[i]; \
1720 uint##size##_t src3 = r->element[i]; \
1721 uint##size##_t begin, end, shift, mask, rot_val; \
1722 \
1723 shift = extract##size(src2, 0, 6); \
1724 end = extract##size(src2, 8, 6); \
1725 begin = extract##size(src2, 16, 6); \
1726 rot_val = rol##size(src1, shift); \
1727 mask = mask_u##size(begin, end); \
09a245e1
BR
1728 if (insert) { \
1729 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1730 } else { \
1731 r->element[i] = (rot_val & mask); \
1732 } \
3e00884f
GS
1733 } \
1734}
1735
09a245e1
BR
1736VRLMI(vrldmi, 64, u64, 1);
1737VRLMI(vrlwmi, 32, u32, 1);
1738VRLMI(vrldnm, 64, u64, 0);
1739VRLMI(vrlwnm, 32, u32, 0);
3e00884f 1740
d15f74fb
BS
1741void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1742 ppc_avr_t *c)
64654ded
BS
1743{
1744 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1745 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1746}
1747
d15f74fb 1748void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1749{
1750 int i;
1751
05ee3e8a
MCA
1752 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1753 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
64654ded
BS
1754 }
1755}
1756
d15f74fb 1757void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1758{
1759 int i;
1760
05ee3e8a
MCA
1761 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1762 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
64654ded
BS
1763 }
1764}
1765
60caf221
AK
1766#if defined(HOST_WORDS_BIGENDIAN)
1767#define VEXTU_X_DO(name, size, left) \
1768 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1769 { \
1770 int index; \
1771 if (left) { \
1772 index = (a & 0xf) * 8; \
1773 } else { \
1774 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1775 } \
1776 return int128_getlo(int128_rshift(b->s128, index)) & \
1777 MAKE_64BIT_MASK(0, size); \
1778 }
1779#else
1780#define VEXTU_X_DO(name, size, left) \
1781 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1782 { \
1783 int index; \
1784 if (left) { \
1785 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1786 } else { \
1787 index = (a & 0xf) * 8; \
1788 } \
1789 return int128_getlo(int128_rshift(b->s128, index)) & \
1790 MAKE_64BIT_MASK(0, size); \
1791 }
1792#endif
1793
1794VEXTU_X_DO(vextublx, 8, 1)
1795VEXTU_X_DO(vextuhlx, 16, 1)
1796VEXTU_X_DO(vextuwlx, 32, 1)
1797VEXTU_X_DO(vextubrx, 8, 0)
1798VEXTU_X_DO(vextuhrx, 16, 0)
1799VEXTU_X_DO(vextuwrx, 32, 0)
1800#undef VEXTU_X_DO
1801
64654ded
BS
1802/* The specification says that the results are undefined if all of the
1803 * shift counts are not identical. We check to make sure that they are
1804 * to conform to what real hardware appears to do. */
1805#define VSHIFT(suffix, leftp) \
1806 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1807 { \
1808 int shift = b->u8[LO_IDX*15] & 0x7; \
1809 int doit = 1; \
1810 int i; \
1811 \
1812 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1813 doit = doit && ((b->u8[i] & 0x7) == shift); \
1814 } \
1815 if (doit) { \
1816 if (shift == 0) { \
1817 *r = *a; \
1818 } else if (leftp) { \
1819 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1820 \
1821 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1822 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1823 } else { \
1824 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1825 \
1826 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1827 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1828 } \
1829 } \
1830 }
24e669ba
TM
1831VSHIFT(l, 1)
1832VSHIFT(r, 0)
64654ded 1833#undef VSHIFT
64654ded 1834
818692ff 1835#define VSL(suffix, element, mask) \
64654ded
BS
1836 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1837 { \
1838 int i; \
1839 \
1840 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1841 unsigned int shift = b->element[i] & mask; \
1842 \
1843 r->element[i] = a->element[i] << shift; \
1844 } \
1845 }
818692ff
TM
1846VSL(b, u8, 0x7)
1847VSL(h, u16, 0x0F)
1848VSL(w, u32, 0x1F)
2fdf78e6 1849VSL(d, u64, 0x3F)
64654ded
BS
1850#undef VSL
1851
5644a175
VAS
1852void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1853{
1854 int i;
1855 unsigned int shift, bytes, size;
1856
1857 size = ARRAY_SIZE(r->u8);
1858 for (i = 0; i < size; i++) {
1859 shift = b->u8[i] & 0x7; /* extract shift value */
1860 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1861 (((i + 1) < size) ? a->u8[i + 1] : 0);
1862 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1863 }
1864}
1865
4004c1db
VAS
1866void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1867{
1868 int i;
1869 unsigned int shift, bytes;
1870
1871 /* Use reverse order, as destination and source register can be same. Its
1872 * being modified in place saving temporary, reverse order will guarantee
1873 * that computed result is not fed back.
1874 */
1875 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1876 shift = b->u8[i] & 0x7; /* extract shift value */
1877 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1878 /* extract adjacent bytes */
1879 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1880 }
1881}
1882
64654ded
BS
1883void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1884{
1885 int sh = shift & 0xf;
1886 int i;
1887 ppc_avr_t result;
1888
1889#if defined(HOST_WORDS_BIGENDIAN)
1890 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1891 int index = sh + i;
1892 if (index > 0xf) {
1893 result.u8[i] = b->u8[index - 0x10];
1894 } else {
1895 result.u8[i] = a->u8[index];
1896 }
1897 }
1898#else
1899 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1900 int index = (16 - sh) + i;
1901 if (index > 0xf) {
1902 result.u8[i] = a->u8[index - 0x10];
1903 } else {
1904 result.u8[i] = b->u8[index];
1905 }
1906 }
1907#endif
1908 *r = result;
1909}
1910
1911void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1912{
1913 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1914
1915#if defined(HOST_WORDS_BIGENDIAN)
1916 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1917 memset(&r->u8[16-sh], 0, sh);
1918#else
1919 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1920 memset(&r->u8[0], 0, sh);
1921#endif
1922}
1923
1924/* Experimental testing shows that hardware masks the immediate. */
1925#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1926#if defined(HOST_WORDS_BIGENDIAN)
1927#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1928#else
1929#define SPLAT_ELEMENT(element) \
1930 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1931#endif
1932#define VSPLT(suffix, element) \
1933 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1934 { \
1935 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1936 int i; \
1937 \
1938 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1939 r->element[i] = s; \
1940 } \
1941 }
1942VSPLT(b, u8)
1943VSPLT(h, u16)
1944VSPLT(w, u32)
1945#undef VSPLT
1946#undef SPLAT_ELEMENT
1947#undef _SPLAT_MASKED
e7b1e06f
RS
1948#if defined(HOST_WORDS_BIGENDIAN)
1949#define VINSERT(suffix, element) \
1950 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1951 { \
4fff7218 1952 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
e7b1e06f
RS
1953 sizeof(r->element[0])); \
1954 }
1955#else
1956#define VINSERT(suffix, element) \
1957 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1958 { \
1959 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1960 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1961 }
1962#endif
1963VINSERT(b, u8)
1964VINSERT(h, u16)
1965VINSERT(w, u32)
1966VINSERT(d, u64)
1967#undef VINSERT
b5d569a1
RS
1968#if defined(HOST_WORDS_BIGENDIAN)
1969#define VEXTRACT(suffix, element) \
1970 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1971 { \
1972 uint32_t es = sizeof(r->element[0]); \
1973 memmove(&r->u8[8 - es], &b->u8[index], es); \
1974 memset(&r->u8[8], 0, 8); \
1975 memset(&r->u8[0], 0, 8 - es); \
1976 }
1977#else
1978#define VEXTRACT(suffix, element) \
1979 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1980 { \
1981 uint32_t es = sizeof(r->element[0]); \
1982 uint32_t s = (16 - index) - es; \
1983 memmove(&r->u8[8], &b->u8[s], es); \
1984 memset(&r->u8[0], 0, 8); \
1985 memset(&r->u8[8 + es], 0, 8 - es); \
1986 }
1987#endif
1988VEXTRACT(ub, u8)
1989VEXTRACT(uh, u16)
1990VEXTRACT(uw, u32)
1991VEXTRACT(d, u64)
1992#undef VEXTRACT
64654ded 1993
8ad901e5
ND
1994void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1995 target_ulong xbn, uint32_t index)
1996{
1997 ppc_vsr_t xt, xb;
1998 size_t es = sizeof(uint32_t);
1999 uint32_t ext_index;
2000 int i;
2001
2002 getVSR(xbn, &xb, env);
2003 memset(&xt, 0, sizeof(xt));
2004
2005#if defined(HOST_WORDS_BIGENDIAN)
2006 ext_index = index;
2007 for (i = 0; i < es; i++, ext_index++) {
2008 xt.u8[8 - es + i] = xb.u8[ext_index % 16];
2009 }
2010#else
2011 ext_index = 15 - index;
2012 for (i = es - 1; i >= 0; i--, ext_index--) {
2013 xt.u8[8 + i] = xb.u8[ext_index % 16];
2014 }
2015#endif
2016
2017 putVSR(xtn, &xt, env);
2018}
2019
3398b742
ND
2020void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
2021 target_ulong xbn, uint32_t index)
2022{
2023 ppc_vsr_t xt, xb;
2024 size_t es = sizeof(uint32_t);
2025 int ins_index, i = 0;
2026
2027 getVSR(xbn, &xb, env);
2028 getVSR(xtn, &xt, env);
2029
2030#if defined(HOST_WORDS_BIGENDIAN)
2031 ins_index = index;
2032 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
2033 xt.u8[ins_index] = xb.u8[8 - es + i];
2034 }
2035#else
2036 ins_index = 15 - index;
2037 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
2038 xt.u8[ins_index] = xb.u8[8 + i];
2039 }
2040#endif
2041
2042 putVSR(xtn, &xt, env);
2043}
2044
125a9b23
ND
2045#define VEXT_SIGNED(name, element, mask, cast, recast) \
2046void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2047{ \
2048 int i; \
2049 VECTOR_FOR_INORDER_I(i, element) { \
2050 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
2051 } \
2052}
2053VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2054VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2055VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2056VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2057VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2058#undef VEXT_SIGNED
2059
cc8b6e76
ND
2060#define VNEG(name, element) \
2061void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2062{ \
2063 int i; \
2064 VECTOR_FOR_INORDER_I(i, element) { \
2065 r->element[i] = -b->element[i]; \
2066 } \
2067}
2068VNEG(vnegw, s32)
2069VNEG(vnegd, s64)
2070#undef VNEG
2071
64654ded
BS
2072#define VSPLTI(suffix, element, splat_type) \
2073 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2074 { \
2075 splat_type x = (int8_t)(splat << 3) >> 3; \
2076 int i; \
2077 \
2078 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2079 r->element[i] = x; \
2080 } \
2081 }
2082VSPLTI(b, s8, int8_t)
2083VSPLTI(h, s16, int16_t)
2084VSPLTI(w, s32, int32_t)
2085#undef VSPLTI
2086
818692ff 2087#define VSR(suffix, element, mask) \
64654ded
BS
2088 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2089 { \
2090 int i; \
2091 \
2092 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 2093 unsigned int shift = b->element[i] & mask; \
64654ded
BS
2094 r->element[i] = a->element[i] >> shift; \
2095 } \
2096 }
818692ff
TM
2097VSR(ab, s8, 0x7)
2098VSR(ah, s16, 0xF)
2099VSR(aw, s32, 0x1F)
2fdf78e6 2100VSR(ad, s64, 0x3F)
818692ff
TM
2101VSR(b, u8, 0x7)
2102VSR(h, u16, 0xF)
2103VSR(w, u32, 0x1F)
2fdf78e6 2104VSR(d, u64, 0x3F)
64654ded
BS
2105#undef VSR
2106
2107void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2108{
2109 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2110
2111#if defined(HOST_WORDS_BIGENDIAN)
2112 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2113 memset(&r->u8[0], 0, sh);
2114#else
2115 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2116 memset(&r->u8[16 - sh], 0, sh);
2117#endif
2118}
2119
2120void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2121{
2122 int i;
2123
2124 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2125 r->u32[i] = a->u32[i] >= b->u32[i];
2126 }
2127}
2128
d15f74fb 2129void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2130{
2131 int64_t t;
2132 int i, upper;
2133 ppc_avr_t result;
2134 int sat = 0;
2135
2136#if defined(HOST_WORDS_BIGENDIAN)
2137 upper = ARRAY_SIZE(r->s32)-1;
2138#else
2139 upper = 0;
2140#endif
2141 t = (int64_t)b->s32[upper];
2142 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2143 t += a->s32[i];
2144 result.s32[i] = 0;
2145 }
2146 result.s32[upper] = cvtsdsw(t, &sat);
2147 *r = result;
2148
2149 if (sat) {
2150 env->vscr |= (1 << VSCR_SAT);
2151 }
2152}
2153
d15f74fb 2154void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2155{
2156 int i, j, upper;
2157 ppc_avr_t result;
2158 int sat = 0;
2159
2160#if defined(HOST_WORDS_BIGENDIAN)
2161 upper = 1;
2162#else
2163 upper = 0;
2164#endif
2165 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2166 int64_t t = (int64_t)b->s32[upper + i * 2];
2167
2168 result.u64[i] = 0;
2169 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2170 t += a->s32[2 * i + j];
2171 }
2172 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2173 }
2174
2175 *r = result;
2176 if (sat) {
2177 env->vscr |= (1 << VSCR_SAT);
2178 }
2179}
2180
d15f74fb 2181void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2182{
2183 int i, j;
2184 int sat = 0;
2185
2186 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2187 int64_t t = (int64_t)b->s32[i];
2188
2189 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2190 t += a->s8[4 * i + j];
2191 }
2192 r->s32[i] = cvtsdsw(t, &sat);
2193 }
2194
2195 if (sat) {
2196 env->vscr |= (1 << VSCR_SAT);
2197 }
2198}
2199
d15f74fb 2200void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2201{
2202 int sat = 0;
2203 int i;
2204
2205 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2206 int64_t t = (int64_t)b->s32[i];
2207
2208 t += a->s16[2 * i] + a->s16[2 * i + 1];
2209 r->s32[i] = cvtsdsw(t, &sat);
2210 }
2211
2212 if (sat) {
2213 env->vscr |= (1 << VSCR_SAT);
2214 }
2215}
2216
d15f74fb 2217void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
2218{
2219 int i, j;
2220 int sat = 0;
2221
2222 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2223 uint64_t t = (uint64_t)b->u32[i];
2224
2225 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2226 t += a->u8[4 * i + j];
2227 }
2228 r->u32[i] = cvtuduw(t, &sat);
2229 }
2230
2231 if (sat) {
2232 env->vscr |= (1 << VSCR_SAT);
2233 }
2234}
2235
2236#if defined(HOST_WORDS_BIGENDIAN)
2237#define UPKHI 1
2238#define UPKLO 0
2239#else
2240#define UPKHI 0
2241#define UPKLO 1
2242#endif
2243#define VUPKPX(suffix, hi) \
2244 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2245 { \
2246 int i; \
2247 ppc_avr_t result; \
2248 \
2249 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2250 uint16_t e = b->u16[hi ? i : i+4]; \
2251 uint8_t a = (e >> 15) ? 0xff : 0; \
2252 uint8_t r = (e >> 10) & 0x1f; \
2253 uint8_t g = (e >> 5) & 0x1f; \
2254 uint8_t b = e & 0x1f; \
2255 \
2256 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2257 } \
2258 *r = result; \
2259 }
2260VUPKPX(lpx, UPKLO)
2261VUPKPX(hpx, UPKHI)
2262#undef VUPKPX
2263
2264#define VUPK(suffix, unpacked, packee, hi) \
2265 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2266 { \
2267 int i; \
2268 ppc_avr_t result; \
2269 \
2270 if (hi) { \
2271 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2272 result.unpacked[i] = b->packee[i]; \
2273 } \
2274 } else { \
2275 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2276 i++) { \
2277 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2278 } \
2279 } \
2280 *r = result; \
2281 }
2282VUPK(hsb, s16, s8, UPKHI)
2283VUPK(hsh, s32, s16, UPKHI)
4430e076 2284VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2285VUPK(lsb, s16, s8, UPKLO)
2286VUPK(lsh, s32, s16, UPKLO)
4430e076 2287VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2288#undef VUPK
2289#undef UPKHI
2290#undef UPKLO
2291
f293f04a
TM
2292#define VGENERIC_DO(name, element) \
2293 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2294 { \
2295 int i; \
2296 \
2297 VECTOR_FOR_INORDER_I(i, element) { \
2298 r->element[i] = name(b->element[i]); \
2299 } \
2300 }
2301
2302#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2303#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2304#define clzw(v) clz32((v))
2305#define clzd(v) clz64((v))
2306
2307VGENERIC_DO(clzb, u8)
2308VGENERIC_DO(clzh, u16)
2309VGENERIC_DO(clzw, u32)
2310VGENERIC_DO(clzd, u64)
2311
2312#undef clzb
2313#undef clzh
2314#undef clzw
2315#undef clzd
2316
a5ad8fbf
RS
2317#define ctzb(v) ((v) ? ctz32(v) : 8)
2318#define ctzh(v) ((v) ? ctz32(v) : 16)
2319#define ctzw(v) ctz32((v))
2320#define ctzd(v) ctz64((v))
2321
2322VGENERIC_DO(ctzb, u8)
2323VGENERIC_DO(ctzh, u16)
2324VGENERIC_DO(ctzw, u32)
2325VGENERIC_DO(ctzd, u64)
2326
2327#undef ctzb
2328#undef ctzh
2329#undef ctzw
2330#undef ctzd
2331
e13500b3
TM
2332#define popcntb(v) ctpop8(v)
2333#define popcnth(v) ctpop16(v)
2334#define popcntw(v) ctpop32(v)
2335#define popcntd(v) ctpop64(v)
2336
2337VGENERIC_DO(popcntb, u8)
2338VGENERIC_DO(popcnth, u16)
2339VGENERIC_DO(popcntw, u32)
2340VGENERIC_DO(popcntd, u64)
2341
2342#undef popcntb
2343#undef popcnth
2344#undef popcntw
2345#undef popcntd
f293f04a
TM
2346
2347#undef VGENERIC_DO
2348
b41da4eb
TM
2349#if defined(HOST_WORDS_BIGENDIAN)
2350#define QW_ONE { .u64 = { 0, 1 } }
2351#else
2352#define QW_ONE { .u64 = { 1, 0 } }
2353#endif
2354
2355#ifndef CONFIG_INT128
2356
2357static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2358{
2359 t->u64[0] = ~a.u64[0];
2360 t->u64[1] = ~a.u64[1];
2361}
2362
2363static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2364{
2365 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2366 return -1;
2367 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2368 return 1;
2369 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2370 return -1;
2371 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2372 return 1;
2373 } else {
2374 return 0;
2375 }
2376}
2377
2378static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2379{
2380 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2381 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2382 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2383}
2384
2385static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2386{
2387 ppc_avr_t not_a;
2388 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2389 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2390 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2391 avr_qw_not(&not_a, a);
2392 return avr_qw_cmpu(not_a, b) < 0;
2393}
2394
2395#endif
2396
2397void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2398{
2399#ifdef CONFIG_INT128
2400 r->u128 = a->u128 + b->u128;
2401#else
2402 avr_qw_add(r, *a, *b);
2403#endif
2404}
2405
2406void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2407{
2408#ifdef CONFIG_INT128
2409 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2410#else
2411
2412 if (c->u64[LO_IDX] & 1) {
2413 ppc_avr_t tmp;
2414
2415 tmp.u64[HI_IDX] = 0;
2416 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2417 avr_qw_add(&tmp, *a, tmp);
2418 avr_qw_add(r, tmp, *b);
2419 } else {
2420 avr_qw_add(r, *a, *b);
2421 }
2422#endif
2423}
2424
2425void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2426{
2427#ifdef CONFIG_INT128
2428 r->u128 = (~a->u128 < b->u128);
2429#else
2430 ppc_avr_t not_a;
2431
2432 avr_qw_not(&not_a, *a);
2433
2434 r->u64[HI_IDX] = 0;
2435 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2436#endif
2437}
2438
2439void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2440{
2441#ifdef CONFIG_INT128
2442 int carry_out = (~a->u128 < b->u128);
2443 if (!carry_out && (c->u128 & 1)) {
2444 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2445 ((a->u128 != 0) || (b->u128 != 0));
2446 }
2447 r->u128 = carry_out;
2448#else
2449
2450 int carry_in = c->u64[LO_IDX] & 1;
2451 int carry_out = 0;
2452 ppc_avr_t tmp;
2453
2454 carry_out = avr_qw_addc(&tmp, *a, *b);
2455
2456 if (!carry_out && carry_in) {
2457 ppc_avr_t one = QW_ONE;
2458 carry_out = avr_qw_addc(&tmp, tmp, one);
2459 }
2460 r->u64[HI_IDX] = 0;
2461 r->u64[LO_IDX] = carry_out;
2462#endif
2463}
2464
2465void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2466{
2467#ifdef CONFIG_INT128
2468 r->u128 = a->u128 - b->u128;
2469#else
2470 ppc_avr_t tmp;
2471 ppc_avr_t one = QW_ONE;
2472
2473 avr_qw_not(&tmp, *b);
2474 avr_qw_add(&tmp, *a, tmp);
2475 avr_qw_add(r, tmp, one);
2476#endif
2477}
2478
2479void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2480{
2481#ifdef CONFIG_INT128
2482 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2483#else
2484 ppc_avr_t tmp, sum;
2485
2486 avr_qw_not(&tmp, *b);
2487 avr_qw_add(&sum, *a, tmp);
2488
2489 tmp.u64[HI_IDX] = 0;
2490 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2491 avr_qw_add(r, sum, tmp);
2492#endif
2493}
2494
2495void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2496{
2497#ifdef CONFIG_INT128
2498 r->u128 = (~a->u128 < ~b->u128) ||
2499 (a->u128 + ~b->u128 == (__uint128_t)-1);
2500#else
2501 int carry = (avr_qw_cmpu(*a, *b) > 0);
2502 if (!carry) {
2503 ppc_avr_t tmp;
2504 avr_qw_not(&tmp, *b);
2505 avr_qw_add(&tmp, *a, tmp);
2506 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2507 }
2508 r->u64[HI_IDX] = 0;
2509 r->u64[LO_IDX] = carry;
2510#endif
2511}
2512
2513void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2514{
2515#ifdef CONFIG_INT128
2516 r->u128 =
2517 (~a->u128 < ~b->u128) ||
2518 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2519#else
2520 int carry_in = c->u64[LO_IDX] & 1;
2521 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2522 if (!carry_out && carry_in) {
2523 ppc_avr_t tmp;
2524 avr_qw_not(&tmp, *b);
2525 avr_qw_add(&tmp, *a, tmp);
2526 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2527 }
2528
2529 r->u64[HI_IDX] = 0;
2530 r->u64[LO_IDX] = carry_out;
2531#endif
2532}
2533
e8f7b27b
TM
2534#define BCD_PLUS_PREF_1 0xC
2535#define BCD_PLUS_PREF_2 0xF
2536#define BCD_PLUS_ALT_1 0xA
2537#define BCD_NEG_PREF 0xD
2538#define BCD_NEG_ALT 0xB
2539#define BCD_PLUS_ALT_2 0xE
b8155872
JRZ
2540#define NATIONAL_PLUS 0x2B
2541#define NATIONAL_NEG 0x2D
e8f7b27b
TM
2542
2543#if defined(HOST_WORDS_BIGENDIAN)
365206ae 2544#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
e8f7b27b 2545#else
365206ae 2546#define BCD_DIG_BYTE(n) ((n) / 2)
e8f7b27b
TM
2547#endif
2548
2549static int bcd_get_sgn(ppc_avr_t *bcd)
2550{
2551 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2552 case BCD_PLUS_PREF_1:
2553 case BCD_PLUS_PREF_2:
2554 case BCD_PLUS_ALT_1:
2555 case BCD_PLUS_ALT_2:
2556 {
2557 return 1;
2558 }
2559
2560 case BCD_NEG_PREF:
2561 case BCD_NEG_ALT:
2562 {
2563 return -1;
2564 }
2565
2566 default:
2567 {
2568 return 0;
2569 }
2570 }
2571}
2572
2573static int bcd_preferred_sgn(int sgn, int ps)
2574{
2575 if (sgn >= 0) {
2576 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2577 } else {
2578 return BCD_NEG_PREF;
2579 }
2580}
2581
2582static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2583{
2584 uint8_t result;
2585 if (n & 1) {
2586 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2587 } else {
2588 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2589 }
2590
2591 if (unlikely(result > 9)) {
2592 *invalid = true;
2593 }
2594 return result;
2595}
2596
2597static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2598{
2599 if (n & 1) {
2600 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2601 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2602 } else {
2603 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2604 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2605 }
2606}
2607
071663df
JRZ
2608static bool bcd_is_valid(ppc_avr_t *bcd)
2609{
2610 int i;
2611 int invalid = 0;
2612
2613 if (bcd_get_sgn(bcd) == 0) {
2614 return false;
2615 }
2616
2617 for (i = 1; i < 32; i++) {
2618 bcd_get_digit(bcd, i, &invalid);
2619 if (unlikely(invalid)) {
2620 return false;
2621 }
2622 }
2623 return true;
2624}
2625
b8155872
JRZ
2626static int bcd_cmp_zero(ppc_avr_t *bcd)
2627{
2628 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
efa73196 2629 return CRF_EQ;
b8155872 2630 } else {
efa73196 2631 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
b8155872
JRZ
2632 }
2633}
2634
2635static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2636{
2637#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2638 return reg->u16[7 - n];
b8155872
JRZ
2639#else
2640 return reg->u16[n];
2641#endif
2642}
2643
e2106d73
JRZ
2644static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2645{
2646#if defined(HOST_WORDS_BIGENDIAN)
a813fe73 2647 reg->u16[7 - n] = val;
e2106d73
JRZ
2648#else
2649 reg->u16[n] = val;
2650#endif
2651}
2652
e8f7b27b
TM
2653static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2654{
2655 int i;
2656 int invalid = 0;
2657 for (i = 31; i > 0; i--) {
2658 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2659 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2660 if (unlikely(invalid)) {
3b163b01 2661 return 0; /* doesn't matter */
e8f7b27b
TM
2662 } else if (dig_a > dig_b) {
2663 return 1;
2664 } else if (dig_a < dig_b) {
2665 return -1;
2666 }
2667 }
2668
2669 return 0;
2670}
2671
d03b174a 2672static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
e8f7b27b
TM
2673 int *overflow)
2674{
2675 int carry = 0;
2676 int i;
e8f7b27b
TM
2677 for (i = 1; i <= 31; i++) {
2678 uint8_t digit = bcd_get_digit(a, i, invalid) +
2679 bcd_get_digit(b, i, invalid) + carry;
e8f7b27b
TM
2680 if (digit > 9) {
2681 carry = 1;
2682 digit -= 10;
2683 } else {
2684 carry = 0;
2685 }
2686
2687 bcd_put_digit(t, digit, i);
e8f7b27b
TM
2688 }
2689
2690 *overflow = carry;
e8f7b27b
TM
2691}
2692
d03b174a 2693static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
e8f7b27b
TM
2694 int *overflow)
2695{
2696 int carry = 0;
2697 int i;
d03b174a 2698
e8f7b27b
TM
2699 for (i = 1; i <= 31; i++) {
2700 uint8_t digit = bcd_get_digit(a, i, invalid) -
2701 bcd_get_digit(b, i, invalid) + carry;
e8f7b27b
TM
2702 if (digit & 0x80) {
2703 carry = -1;
2704 digit += 10;
2705 } else {
2706 carry = 0;
2707 }
2708
2709 bcd_put_digit(t, digit, i);
e8f7b27b
TM
2710 }
2711
2712 *overflow = carry;
e8f7b27b
TM
2713}
2714
2715uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2716{
2717
2718 int sgna = bcd_get_sgn(a);
2719 int sgnb = bcd_get_sgn(b);
2720 int invalid = (sgna == 0) || (sgnb == 0);
2721 int overflow = 0;
e8f7b27b
TM
2722 uint32_t cr = 0;
2723 ppc_avr_t result = { .u64 = { 0, 0 } };
2724
2725 if (!invalid) {
2726 if (sgna == sgnb) {
2727 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
d03b174a
YB
2728 bcd_add_mag(&result, a, b, &invalid, &overflow);
2729 cr = bcd_cmp_zero(&result);
e8f7b27b 2730 } else {
d03b174a
YB
2731 int magnitude = bcd_cmp_mag(a, b);
2732 if (magnitude > 0) {
2733 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2734 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2735 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2736 } else if (magnitude < 0) {
2737 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2738 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2739 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2740 } else {
2741 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2742 cr = CRF_EQ;
2743 }
e8f7b27b
TM
2744 }
2745 }
2746
2747 if (unlikely(invalid)) {
2748 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
efa73196 2749 cr = CRF_SO;
e8f7b27b 2750 } else if (overflow) {
efa73196 2751 cr |= CRF_SO;
e8f7b27b
TM
2752 }
2753
2754 *r = result;
2755
2756 return cr;
2757}
2758
2759uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2760{
2761 ppc_avr_t bcopy = *b;
2762 int sgnb = bcd_get_sgn(b);
2763 if (sgnb < 0) {
2764 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2765 } else if (sgnb > 0) {
2766 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2767 }
2768 /* else invalid ... defer to bcdadd code for proper handling */
2769
2770 return helper_bcdadd(r, a, &bcopy, ps);
2771}
f293f04a 2772
b8155872
JRZ
2773uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2774{
2775 int i;
2776 int cr = 0;
2777 uint16_t national = 0;
2778 uint16_t sgnb = get_national_digit(b, 0);
2779 ppc_avr_t ret = { .u64 = { 0, 0 } };
2780 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2781
2782 for (i = 1; i < 8; i++) {
2783 national = get_national_digit(b, i);
2784 if (unlikely(national < 0x30 || national > 0x39)) {
2785 invalid = 1;
2786 break;
2787 }
2788
2789 bcd_put_digit(&ret, national & 0xf, i);
2790 }
2791
2792 if (sgnb == NATIONAL_PLUS) {
2793 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2794 } else {
2795 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2796 }
2797
2798 cr = bcd_cmp_zero(&ret);
2799
2800 if (unlikely(invalid)) {
efa73196 2801 cr = CRF_SO;
b8155872
JRZ
2802 }
2803
2804 *r = ret;
2805
2806 return cr;
2807}
2808
e2106d73
JRZ
2809uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2810{
2811 int i;
2812 int cr = 0;
2813 int sgnb = bcd_get_sgn(b);
2814 int invalid = (sgnb == 0);
2815 ppc_avr_t ret = { .u64 = { 0, 0 } };
2816
2817 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2818
2819 for (i = 1; i < 8; i++) {
2820 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2821
2822 if (unlikely(invalid)) {
2823 break;
2824 }
2825 }
2826 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2827
2828 cr = bcd_cmp_zero(b);
2829
2830 if (ox_flag) {
efa73196 2831 cr |= CRF_SO;
e2106d73
JRZ
2832 }
2833
2834 if (unlikely(invalid)) {
efa73196 2835 cr = CRF_SO;
e2106d73
JRZ
2836 }
2837
2838 *r = ret;
2839
2840 return cr;
2841}
2842
38f4cb04
JRZ
2843uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2844{
2845 int i;
2846 int cr = 0;
2847 int invalid = 0;
2848 int zone_digit = 0;
2849 int zone_lead = ps ? 0xF : 0x3;
2850 int digit = 0;
2851 ppc_avr_t ret = { .u64 = { 0, 0 } };
2852 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2853
2854 if (unlikely((sgnb < 0xA) && ps)) {
2855 invalid = 1;
2856 }
2857
2858 for (i = 0; i < 16; i++) {
365206ae 2859 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
38f4cb04
JRZ
2860 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2861 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2862 invalid = 1;
2863 break;
2864 }
2865
2866 bcd_put_digit(&ret, digit, i + 1);
2867 }
2868
2869 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2870 (!ps && (sgnb & 0x4))) {
2871 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2872 } else {
2873 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2874 }
2875
2876 cr = bcd_cmp_zero(&ret);
2877
2878 if (unlikely(invalid)) {
efa73196 2879 cr = CRF_SO;
38f4cb04
JRZ
2880 }
2881
2882 *r = ret;
2883
2884 return cr;
2885}
2886
0a890b31
JRZ
2887uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2888{
2889 int i;
2890 int cr = 0;
2891 uint8_t digit = 0;
2892 int sgnb = bcd_get_sgn(b);
2893 int zone_lead = (ps) ? 0xF0 : 0x30;
2894 int invalid = (sgnb == 0);
2895 ppc_avr_t ret = { .u64 = { 0, 0 } };
2896
2897 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2898
2899 for (i = 0; i < 16; i++) {
2900 digit = bcd_get_digit(b, i + 1, &invalid);
2901
2902 if (unlikely(invalid)) {
2903 break;
2904 }
2905
2906 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2907 }
2908
2909 if (ps) {
2910 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2911 } else {
2912 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2913 }
2914
2915 cr = bcd_cmp_zero(b);
2916
2917 if (ox_flag) {
efa73196 2918 cr |= CRF_SO;
0a890b31
JRZ
2919 }
2920
2921 if (unlikely(invalid)) {
efa73196 2922 cr = CRF_SO;
0a890b31
JRZ
2923 }
2924
2925 *r = ret;
2926
2927 return cr;
2928}
2929
a406c058
JRZ
2930uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2931{
2932 int i;
2933 int cr = 0;
2934 uint64_t lo_value;
2935 uint64_t hi_value;
2936 ppc_avr_t ret = { .u64 = { 0, 0 } };
2937
2938 if (b->s64[HI_IDX] < 0) {
2939 lo_value = -b->s64[LO_IDX];
2940 hi_value = ~b->u64[HI_IDX] + !lo_value;
2941 bcd_put_digit(&ret, 0xD, 0);
2942 } else {
2943 lo_value = b->u64[LO_IDX];
2944 hi_value = b->u64[HI_IDX];
2945 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2946 }
2947
2948 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2949 lo_value > 9999999999999999ULL) {
2950 cr = CRF_SO;
2951 }
2952
2953 for (i = 1; i < 16; hi_value /= 10, i++) {
2954 bcd_put_digit(&ret, hi_value % 10, i);
2955 }
2956
2957 for (; i < 32; lo_value /= 10, i++) {
2958 bcd_put_digit(&ret, lo_value % 10, i);
2959 }
2960
2961 cr |= bcd_cmp_zero(&ret);
2962
2963 *r = ret;
2964
2965 return cr;
2966}
2967
c85bc7dd
JRZ
2968uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2969{
2970 uint8_t i;
2971 int cr;
2972 uint64_t carry;
2973 uint64_t unused;
2974 uint64_t lo_value;
2975 uint64_t hi_value = 0;
2976 int sgnb = bcd_get_sgn(b);
2977 int invalid = (sgnb == 0);
2978
2979 lo_value = bcd_get_digit(b, 31, &invalid);
2980 for (i = 30; i > 0; i--) {
2981 mulu64(&lo_value, &carry, lo_value, 10ULL);
2982 mulu64(&hi_value, &unused, hi_value, 10ULL);
2983 lo_value += bcd_get_digit(b, i, &invalid);
2984 hi_value += carry;
2985
2986 if (unlikely(invalid)) {
2987 break;
2988 }
2989 }
2990
2991 if (sgnb == -1) {
2992 r->s64[LO_IDX] = -lo_value;
2993 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
2994 } else {
2995 r->s64[LO_IDX] = lo_value;
2996 r->s64[HI_IDX] = hi_value;
2997 }
2998
2999 cr = bcd_cmp_zero(b);
3000
3001 if (unlikely(invalid)) {
3002 cr = CRF_SO;
3003 }
3004
3005 return cr;
3006}
3007
c3025c3b
JRZ
3008uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3009{
3010 int i;
3011 int invalid = 0;
3012
3013 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
3014 return CRF_SO;
3015 }
3016
3017 *r = *a;
3018 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
3019
3020 for (i = 1; i < 32; i++) {
3021 bcd_get_digit(a, i, &invalid);
3022 bcd_get_digit(b, i, &invalid);
3023 if (unlikely(invalid)) {
3024 return CRF_SO;
3025 }
3026 }
3027
3028 return bcd_cmp_zero(r);
3029}
3030
466a3f9c
JRZ
3031uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
3032{
466a3f9c
JRZ
3033 int sgnb = bcd_get_sgn(b);
3034
3035 *r = *b;
3036 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
3037
071663df
JRZ
3038 if (bcd_is_valid(b) == false) {
3039 return CRF_SO;
466a3f9c
JRZ
3040 }
3041
3042 return bcd_cmp_zero(r);
3043}
3044
e04797f7
JRZ
3045uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3046{
3047 int cr;
3048#if defined(HOST_WORDS_BIGENDIAN)
3049 int i = a->s8[7];
3050#else
3051 int i = a->s8[8];
3052#endif
3053 bool ox_flag = false;
3054 int sgnb = bcd_get_sgn(b);
3055 ppc_avr_t ret = *b;
3056 ret.u64[LO_IDX] &= ~0xf;
3057
3058 if (bcd_is_valid(b) == false) {
3059 return CRF_SO;
3060 }
3061
3062 if (unlikely(i > 31)) {
3063 i = 31;
3064 } else if (unlikely(i < -31)) {
3065 i = -31;
3066 }
3067
3068 if (i > 0) {
3069 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3070 } else {
3071 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3072 }
3073 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3074
3075 *r = ret;
3076
3077 cr = bcd_cmp_zero(r);
3078 if (ox_flag) {
3079 cr |= CRF_SO;
3080 }
3081
3082 return cr;
3083}
3084
a49a95e9
JRZ
3085uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3086{
3087 int cr;
3088 int i;
3089 int invalid = 0;
3090 bool ox_flag = false;
3091 ppc_avr_t ret = *b;
3092
3093 for (i = 0; i < 32; i++) {
3094 bcd_get_digit(b, i, &invalid);
3095
3096 if (unlikely(invalid)) {
3097 return CRF_SO;
3098 }
3099 }
3100
3101#if defined(HOST_WORDS_BIGENDIAN)
3102 i = a->s8[7];
3103#else
3104 i = a->s8[8];
3105#endif
3106 if (i >= 32) {
3107 ox_flag = true;
3108 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3109 } else if (i <= -32) {
3110 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3111 } else if (i > 0) {
3112 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3113 } else {
3114 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3115 }
3116 *r = ret;
3117
3118 cr = bcd_cmp_zero(r);
3119 if (ox_flag) {
3120 cr |= CRF_SO;
3121 }
3122
3123 return cr;
3124}
3125
a54238ad
JRZ
3126uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3127{
3128 int cr;
3129 int unused = 0;
3130 int invalid = 0;
3131 bool ox_flag = false;
3132 int sgnb = bcd_get_sgn(b);
3133 ppc_avr_t ret = *b;
3134 ret.u64[LO_IDX] &= ~0xf;
3135
3136#if defined(HOST_WORDS_BIGENDIAN)
3137 int i = a->s8[7];
3138 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3139#else
3140 int i = a->s8[8];
3141 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3142#endif
3143
3144 if (bcd_is_valid(b) == false) {
3145 return CRF_SO;
3146 }
3147
3148 if (unlikely(i > 31)) {
3149 i = 31;
3150 } else if (unlikely(i < -31)) {
3151 i = -31;
3152 }
3153
3154 if (i > 0) {
3155 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3156 } else {
3157 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3158
3159 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3160 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3161 }
3162 }
3163 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3164
3165 cr = bcd_cmp_zero(&ret);
3166 if (ox_flag) {
3167 cr |= CRF_SO;
3168 }
3169 *r = ret;
3170
3171 return cr;
3172}
3173
31bc4d11
JRZ
3174uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3175{
3176 uint64_t mask;
3177 uint32_t ox_flag = 0;
3178#if defined(HOST_WORDS_BIGENDIAN)
3179 int i = a->s16[3] + 1;
3180#else
3181 int i = a->s16[4] + 1;
3182#endif
3183 ppc_avr_t ret = *b;
3184
3185 if (bcd_is_valid(b) == false) {
3186 return CRF_SO;
3187 }
3188
3189 if (i > 16 && i < 32) {
3190 mask = (uint64_t)-1 >> (128 - i * 4);
3191 if (ret.u64[HI_IDX] & ~mask) {
3192 ox_flag = CRF_SO;
3193 }
3194
3195 ret.u64[HI_IDX] &= mask;
3196 } else if (i >= 0 && i <= 16) {
3197 mask = (uint64_t)-1 >> (64 - i * 4);
3198 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3199 ox_flag = CRF_SO;
3200 }
3201
3202 ret.u64[LO_IDX] &= mask;
3203 ret.u64[HI_IDX] = 0;
3204 }
3205 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3206 *r = ret;
3207
3208 return bcd_cmp_zero(&ret) | ox_flag;
3209}
3210
5c32e2e4
JRZ
3211uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3212{
3213 int i;
3214 uint64_t mask;
3215 uint32_t ox_flag = 0;
3216 int invalid = 0;
3217 ppc_avr_t ret = *b;
3218
3219 for (i = 0; i < 32; i++) {
3220 bcd_get_digit(b, i, &invalid);
3221
3222 if (unlikely(invalid)) {
3223 return CRF_SO;
3224 }
3225 }
3226
3227#if defined(HOST_WORDS_BIGENDIAN)
3228 i = a->s16[3];
3229#else
3230 i = a->s16[4];
3231#endif
3232 if (i > 16 && i < 33) {
3233 mask = (uint64_t)-1 >> (128 - i * 4);
3234 if (ret.u64[HI_IDX] & ~mask) {
3235 ox_flag = CRF_SO;
3236 }
3237
3238 ret.u64[HI_IDX] &= mask;
3239 } else if (i > 0 && i <= 16) {
3240 mask = (uint64_t)-1 >> (64 - i * 4);
3241 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3242 ox_flag = CRF_SO;
3243 }
3244
3245 ret.u64[LO_IDX] &= mask;
3246 ret.u64[HI_IDX] = 0;
3247 } else if (i == 0) {
3248 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
3249 ox_flag = CRF_SO;
3250 }
3251 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
3252 }
3253
3254 *r = ret;
3255 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
3256 return ox_flag | CRF_EQ;
3257 }
3258
3259 return ox_flag | CRF_GT;
3260}
3261
c1542453 3262void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
3263{
3264 int i;
3265 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 3266 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
3267 }
3268}
3269
c1542453 3270void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 3271{
65cf1f65 3272 ppc_avr_t result;
557d52fa 3273 int i;
557d52fa 3274
c1542453 3275 VECTOR_FOR_INORDER_I(i, u32) {
2dea57db
MCA
3276 result.VsrW(i) = b->VsrW(i) ^
3277 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3278 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3279 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3280 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
557d52fa 3281 }
65cf1f65 3282 *r = result;
557d52fa
TM
3283}
3284
557d52fa
TM
3285void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3286{
65cf1f65 3287 ppc_avr_t result;
c1542453
TM
3288 int i;
3289
3290 VECTOR_FOR_INORDER_I(i, u8) {
2dea57db 3291 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
c1542453 3292 }
65cf1f65 3293 *r = result;
557d52fa
TM
3294}
3295
3296void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3297{
3298 /* This differs from what is written in ISA V2.07. The RTL is */
3299 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
3300 int i;
3301 ppc_avr_t tmp;
3302
3303 VECTOR_FOR_INORDER_I(i, u8) {
2dea57db 3304 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
c1542453
TM
3305 }
3306
3307 VECTOR_FOR_INORDER_I(i, u32) {
2dea57db
MCA
3308 r->VsrW(i) =
3309 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3310 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3311 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3312 AES_imc[tmp.VsrB(4 * i + 3)][3];
c1542453 3313 }
557d52fa
TM
3314}
3315
3316void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3317{
65cf1f65 3318 ppc_avr_t result;
c1542453
TM
3319 int i;
3320
3321 VECTOR_FOR_INORDER_I(i, u8) {
2dea57db 3322 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
c1542453 3323 }
65cf1f65 3324 *r = result;
557d52fa
TM
3325}
3326
57354f8f
TM
3327#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
3328#if defined(HOST_WORDS_BIGENDIAN)
3329#define EL_IDX(i) (i)
3330#else
3331#define EL_IDX(i) (3 - (i))
3332#endif
3333
3334void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3335{
3336 int st = (st_six & 0x10) != 0;
3337 int six = st_six & 0xF;
3338 int i;
3339
3340 VECTOR_FOR_INORDER_I(i, u32) {
3341 if (st == 0) {
3342 if ((six & (0x8 >> i)) == 0) {
3343 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3344 ROTRu32(a->u32[EL_IDX(i)], 18) ^
3345 (a->u32[EL_IDX(i)] >> 3);
3346 } else { /* six.bit[i] == 1 */
3347 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3348 ROTRu32(a->u32[EL_IDX(i)], 19) ^
3349 (a->u32[EL_IDX(i)] >> 10);
3350 }
3351 } else { /* st == 1 */
3352 if ((six & (0x8 >> i)) == 0) {
3353 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3354 ROTRu32(a->u32[EL_IDX(i)], 13) ^
3355 ROTRu32(a->u32[EL_IDX(i)], 22);
3356 } else { /* six.bit[i] == 1 */
3357 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3358 ROTRu32(a->u32[EL_IDX(i)], 11) ^
3359 ROTRu32(a->u32[EL_IDX(i)], 25);
3360 }
3361 }
3362 }
3363}
3364
3365#undef ROTRu32
3366#undef EL_IDX
3367
3368#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3369#if defined(HOST_WORDS_BIGENDIAN)
3370#define EL_IDX(i) (i)
3371#else
3372#define EL_IDX(i) (1 - (i))
3373#endif
3374
3375void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3376{
3377 int st = (st_six & 0x10) != 0;
3378 int six = st_six & 0xF;
3379 int i;
3380
3381 VECTOR_FOR_INORDER_I(i, u64) {
3382 if (st == 0) {
3383 if ((six & (0x8 >> (2*i))) == 0) {
3384 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3385 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3386 (a->u64[EL_IDX(i)] >> 7);
3387 } else { /* six.bit[2*i] == 1 */
3388 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3389 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3390 (a->u64[EL_IDX(i)] >> 6);
3391 }
3392 } else { /* st == 1 */
3393 if ((six & (0x8 >> (2*i))) == 0) {
3394 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3395 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3396 ROTRu64(a->u64[EL_IDX(i)], 39);
3397 } else { /* six.bit[2*i] == 1 */
3398 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3399 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3400 ROTRu64(a->u64[EL_IDX(i)], 41);
3401 }
3402 }
3403 }
3404}
3405
3406#undef ROTRu64
3407#undef EL_IDX
3408
ac174549
TM
3409void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3410{
65cf1f65 3411 ppc_avr_t result;
ac174549 3412 int i;
65cf1f65 3413
ac174549
TM
3414 VECTOR_FOR_INORDER_I(i, u8) {
3415 int indexA = c->u8[i] >> 4;
3416 int indexB = c->u8[i] & 0xF;
3417#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 3418 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 3419#else
65cf1f65 3420 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
3421#endif
3422 }
65cf1f65 3423 *r = result;
ac174549
TM
3424}
3425
64654ded
BS
3426#undef VECTOR_FOR_INORDER_I
3427#undef HI_IDX
3428#undef LO_IDX
3429
3430/*****************************************************************************/
3431/* SPE extension helpers */
3432/* Use a table to make this quicker */
ea6c0dac 3433static const uint8_t hbrev[16] = {
64654ded
BS
3434 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3435 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3436};
3437
3438static inline uint8_t byte_reverse(uint8_t val)
3439{
3440 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3441}
3442
3443static inline uint32_t word_reverse(uint32_t val)
3444{
3445 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3446 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3447}
3448
3449#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3450target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3451{
3452 uint32_t a, b, d, mask;
3453
3454 mask = UINT32_MAX >> (32 - MASKBITS);
3455 a = arg1 & mask;
3456 b = arg2 & mask;
3457 d = word_reverse(1 + word_reverse(a | ~b));
3458 return (arg1 & ~mask) | (d & b);
3459}
3460
3461uint32_t helper_cntlsw32(uint32_t val)
3462{
3463 if (val & 0x80000000) {
3464 return clz32(~val);
3465 } else {
3466 return clz32(val);
3467 }
3468}
3469
3470uint32_t helper_cntlzw32(uint32_t val)
3471{
3472 return clz32(val);
3473}
3474
3475/* 440 specific */
d15f74fb
BS
3476target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3477 target_ulong low, uint32_t update_Rc)
64654ded
BS
3478{
3479 target_ulong mask;
3480 int i;
3481
3482 i = 1;
3483 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3484 if ((high & mask) == 0) {
3485 if (update_Rc) {
3486 env->crf[0] = 0x4;
3487 }
3488 goto done;
3489 }
3490 i++;
3491 }
3492 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3493 if ((low & mask) == 0) {
3494 if (update_Rc) {
3495 env->crf[0] = 0x8;
3496 }
3497 goto done;
3498 }
3499 i++;
3500 }
ebbd8b40 3501 i = 8;
64654ded
BS
3502 if (update_Rc) {
3503 env->crf[0] = 0x2;
3504 }
3505 done:
3506 env->xer = (env->xer & ~0x7F) | i;
3507 if (update_Rc) {
3508 env->crf[0] |= xer_so;
3509 }
3510 return i;
3511}