]> git.proxmox.com Git - mirror_qemu.git/blame - target/ppc/int_helper.c
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
[mirror_qemu.git] / target / ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
6bd039cd 9 * version 2.1 of the License, or (at your option) any later version.
64654ded
BS
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
db725815 19
0d75590d 20#include "qemu/osdep.h"
64654ded 21#include "cpu.h"
3e00884f 22#include "internal.h"
1de7afc9 23#include "qemu/host-utils.h"
db725815 24#include "qemu/main-loop.h"
8a05fd9a 25#include "qemu/log.h"
2ef6175a 26#include "exec/helper-proto.h"
6f2945cd 27#include "crypto/aes.h"
24f91e81 28#include "fpu/softfloat.h"
3f74b632
RH
29#include "qapi/error.h"
30#include "qemu/guest-random.h"
1015fcab 31#include "tcg/tcg-gvec-desc.h"
64654ded
BS
32
33#include "helper_regs.h"
34/*****************************************************************************/
35/* Fixed point operations helpers */
64654ded 36
f32899de
ND
37static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38{
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
43 }
44}
45
6a4fda33
TM
46target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48{
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
f32899de 67 helper_update_ov_legacy(env, overflow);
6a4fda33
TM
68 }
69
70 return (target_ulong)rt;
71}
72
a98eb9e9
TM
73target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75{
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
f32899de 95 helper_update_ov_legacy(env, overflow);
a98eb9e9
TM
96 }
97
98 return (target_ulong)rt;
99}
100
98d1eb27
TM
101#if defined(TARGET_PPC64)
102
103uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104{
105 uint64_t rt = 0;
106 int overflow = 0;
107
9276a31c
LP
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
98d1eb27 110 rt = 0; /* Undefined */
9276a31c
LP
111 } else {
112 divu128(&rt, &ra, rb);
98d1eb27
TM
113 }
114
115 if (oe) {
f32899de 116 helper_update_ov_legacy(env, overflow);
98d1eb27
TM
117 }
118
119 return rt;
120}
121
e44259b6
TM
122uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123{
40f3e79a 124 uint64_t rt = 0;
e44259b6
TM
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
9276a31c 127 int overflow = 0;
e44259b6 128
9276a31c
LP
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
e44259b6 131 rt = 0; /* Undefined */
9276a31c
LP
132 } else {
133 divs128(&rt, &ra, rb);
e44259b6
TM
134 }
135
136 if (oe) {
f32899de 137 helper_update_ov_legacy(env, overflow);
e44259b6
TM
138 }
139
140 return rt;
141}
142
98d1eb27
TM
143#endif
144
145
64654ded 146#if defined(TARGET_PPC64)
082ce330
ND
147/* if x = 0xab, returns 0xababababababababa */
148#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
b6cb41b2
DG
150/*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
082ce330
ND
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158/* When you XOR the pattern and there is a match, that byte will be zero */
159#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162{
efa73196 163 return hasvalue(rb, ra) ? CRF_GT : 0;
082ce330
ND
164}
165
166#undef pattern
167#undef haszero
168#undef hasvalue
169
b6cb41b2 170/*
3f74b632 171 * Return a random number.
fec5c62a 172 */
3f74b632 173uint64_t helper_darn32(void)
fec5c62a 174{
3f74b632
RH
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
fec5c62a 184
3f74b632 185 return ret;
fec5c62a
RB
186}
187
3f74b632
RH
188uint64_t helper_darn64(void)
189{
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
64654ded 199
3f74b632
RH
200 return ret;
201}
86ba37ed
TM
202
203uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204{
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
b6cb41b2 209 int index = (rs >> (i * 8)) & 0xFF;
86ba37ed 210 if (index < 64) {
a6a444a8 211 if (rb & PPC_BIT(index)) {
86ba37ed
TM
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217}
218
219#endif
220
fcfda20f
AJ
221target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222{
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234}
235
64654ded 236/* shift right arithmetic helper */
d15f74fb
BS
237target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
64654ded
BS
239{
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
af1c259f 247 env->ca32 = env->ca = 0;
64654ded 248 } else {
af1c259f 249 env->ca32 = env->ca = 1;
64654ded
BS
250 }
251 } else {
252 ret = (int32_t)value;
af1c259f 253 env->ca32 = env->ca = 0;
64654ded
BS
254 }
255 } else {
256 ret = (int32_t)value >> 31;
af1c259f 257 env->ca32 = env->ca = (ret != 0);
64654ded
BS
258 }
259 return (target_long)ret;
260}
261
262#if defined(TARGET_PPC64)
d15f74fb
BS
263target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
64654ded
BS
265{
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
4bc02e23 272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
af1c259f 273 env->ca32 = env->ca = 0;
64654ded 274 } else {
af1c259f 275 env->ca32 = env->ca = 1;
64654ded
BS
276 }
277 } else {
278 ret = (int64_t)value;
af1c259f 279 env->ca32 = env->ca = 0;
64654ded
BS
280 }
281 } else {
282 ret = (int64_t)value >> 63;
af1c259f 283 env->ca32 = env->ca = (ret != 0);
64654ded
BS
284 }
285 return ret;
286}
287#endif
288
289#if defined(TARGET_PPC64)
290target_ulong helper_popcntb(target_ulong val)
291{
79770002 292 /* Note that we don't fold past bytes */
64654ded
BS
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300}
301
302target_ulong helper_popcntw(target_ulong val)
303{
79770002 304 /* Note that we don't fold past words. */
64654ded
BS
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316}
64654ded
BS
317#else
318target_ulong helper_popcntb(target_ulong val)
319{
79770002 320 /* Note that we don't fold past bytes */
64654ded
BS
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325}
64654ded
BS
326#endif
327
6e0bbc40 328uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
89ccd7dc
MF
329{
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388}
389
21ba6e58
MF
390uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391{
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406}
8bdb7606
MF
407
408uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409{
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424}
21ba6e58 425
64654ded
BS
426/*****************************************************************************/
427/* Altivec extension helpers */
e03b5686 428#if HOST_BIG_ENDIAN
64654ded
BS
429#define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431#else
432#define VECTOR_FOR_INORDER_I(index, element) \
b6cb41b2 433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
64654ded
BS
434#endif
435
64654ded
BS
436/* Saturating arithmetic helpers. */
437#define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453#define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476#undef SATCVT
477#undef SATCVTU
478
dedfaac7 479void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
64654ded 480{
c19940db 481 ppc_store_vscr(env, vscr);
64654ded
BS
482}
483
cc2b90d7
RH
484uint32_t helper_mfvscr(CPUPPCState *env)
485{
c19940db 486 return ppc_get_vscr(env);
cc2b90d7
RH
487}
488
6175f5a0
RH
489static inline void set_vscr_sat(CPUPPCState *env)
490{
9b5b74da
RH
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
6175f5a0
RH
493}
494
64654ded
BS
495void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496{
497 int i;
498
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
501 }
502}
503
5c69452c
AK
504/* vprtybw */
505void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506{
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
512 }
513}
514
515/* vprtybd */
516void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517{
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
524 }
525}
526
527/* vprtybq */
528void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529{
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
3c385a93
MCA
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
5c69452c
AK
536}
537
64654ded 538#define VARITHFP(suffix, func) \
d15f74fb
BS
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
64654ded
BS
541 { \
542 int i; \
543 \
05ee3e8a
MCA
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
64654ded
BS
546 } \
547 }
548VARITHFP(addfp, float32_add)
549VARITHFP(subfp, float32_sub)
db1babb8
AJ
550VARITHFP(minfp, float32_min)
551VARITHFP(maxfp, float32_max)
64654ded
BS
552#undef VARITHFP
553
2f93c23f
AJ
554#define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
557 { \
558 int i; \
05ee3e8a
MCA
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
2f93c23f
AJ
562 } \
563 }
564VARITHFPFMA(maddfp, 0);
565VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566#undef VARITHFPFMA
567
64654ded
BS
568#define VARITHSAT_CASE(type, op, cvt, element) \
569 { \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
572 }
573
574#define VARITHSAT_DO(name, op, optype, cvt, element) \
fb11ae7d
RH
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
64654ded
BS
577 { \
578 int sat = 0; \
579 int i; \
580 \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
fb11ae7d 582 VARITHSAT_CASE(optype, op, cvt, element); \
64654ded
BS
583 } \
584 if (sat) { \
fb11ae7d 585 vscr_sat->u32[0] = 1; \
64654ded
BS
586 } \
587 }
588#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600#undef VARITHSAT_CASE
601#undef VARITHSAT_DO
602#undef VARITHSAT_SIGNED
603#undef VARITHSAT_UNSIGNED
604
605#define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 { \
608 int i; \
609 \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
613 } \
614 }
615
616#define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620VAVG(b, s8, int16_t, u8, uint16_t)
621VAVG(h, s16, int32_t, u16, uint32_t)
622VAVG(w, s32, int64_t, u32, uint64_t)
623#undef VAVG_DO
624#undef VAVG
625
37707059
SD
626#define VABSDU_DO(name, element) \
627void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628{ \
629 int i; \
630 \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
635 } \
636}
637
b6cb41b2
DG
638/*
639 * VABSDU - Vector absolute difference unsigned
37707059
SD
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
642 */
643#define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645VABSDU(b, u8)
646VABSDU(h, u16)
647VABSDU(w, u32)
648#undef VABSDU_DO
649#undef VABSDU
650
64654ded 651#define VCF(suffix, cvt, element) \
d15f74fb
BS
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
654 { \
655 int i; \
656 \
05ee3e8a 657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
64654ded 658 float32 t = cvt(b->element[i], &env->vec_status); \
05ee3e8a 659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
64654ded
BS
660 } \
661 }
662VCF(ux, uint32_to_float32, u32)
663VCF(sx, int32_to_float32, s32)
664#undef VCF
665
eb936dc0
MF
666#define VCMPNEZ(NAME, ELEM) \
667void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668{ \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
672 } \
f7cc8466 673}
eb936dc0
MF
674VCMPNEZ(VCMPNEZB, u8)
675VCMPNEZ(VCMPNEZH, u16)
676VCMPNEZ(VCMPNEZW, u32)
677#undef VCMPNEZ
f7cc8466 678
64654ded 679#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
682 { \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
687 \
05ee3e8a 688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
64654ded 689 uint32_t result; \
71bfd65c
RH
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
64654ded
BS
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
699 } \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
703 } \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
706 } \
707 }
708#define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711VCMPFP(eqfp, ==, float_relation_equal)
712VCMPFP(gefp, !=, float_relation_less)
713VCMPFP(gtfp, ==, float_relation_greater)
714#undef VCMPFP_DO
715#undef VCMPFP
716
d15f74fb
BS
717static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
719{
720 int i;
721 int all_in = 0;
722
05ee3e8a 723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
71bfd65c
RH
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
64654ded
BS
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
4007b8de 728 all_in = 1;
64654ded 729 } else {
05ee3e8a 730 float32 bneg = float32_chs(b->f32[i]);
71bfd65c
RH
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
64654ded
BS
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
735
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
738 }
739 }
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
742 }
743}
744
d15f74fb 745void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 746{
d15f74fb 747 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
748}
749
d15f74fb
BS
750void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
64654ded 752{
d15f74fb 753 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
754}
755
756#define VCT(suffix, satcvt, element) \
d15f74fb
BS
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
759 { \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
763 \
764 set_float_rounding_mode(float_round_to_zero, &s); \
05ee3e8a
MCA
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
64654ded
BS
767 r->element[i] = 0; \
768 } else { \
05ee3e8a 769 float64 t = float32_to_float64(b->f32[i], &s); \
64654ded
BS
770 int64_t j; \
771 \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
775 } \
776 } \
777 if (sat) { \
6175f5a0 778 set_vscr_sat(env); \
64654ded
BS
779 } \
780 }
781VCT(uxs, cvtsduw, u32)
782VCT(sxs, cvtsdsw, s32)
783#undef VCT
784
4879538c
RS
785target_ulong helper_vclzlsbb(ppc_avr_t *r)
786{
787 target_ulong count = 0;
788 int i;
60594fea
MCA
789 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
790 if (r->VsrB(i) & 0x01) {
4879538c
RS
791 break;
792 }
793 count++;
794 }
795 return count;
796}
797
798target_ulong helper_vctzlsbb(ppc_avr_t *r)
799{
800 target_ulong count = 0;
801 int i;
4879538c 802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
60594fea 803 if (r->VsrB(i) & 0x01) {
4879538c
RS
804 break;
805 }
806 count++;
807 }
808 return count;
809}
810
d15f74fb
BS
811void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
812 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
813{
814 int sat = 0;
815 int i;
816
817 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
818 int32_t prod = a->s16[i] * b->s16[i];
819 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
820
821 r->s16[i] = cvtswsh(t, &sat);
822 }
823
824 if (sat) {
6175f5a0 825 set_vscr_sat(env);
64654ded
BS
826 }
827}
828
d15f74fb
BS
829void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
830 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
831{
832 int sat = 0;
833 int i;
834
835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
836 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
837 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
838 r->s16[i] = cvtswsh(t, &sat);
839 }
840
841 if (sat) {
6175f5a0 842 set_vscr_sat(env);
64654ded
BS
843 }
844}
845
64654ded
BS
846void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
847{
848 int i;
849
850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
851 int32_t prod = a->s16[i] * b->s16[i];
852 r->s16[i] = (int16_t) (prod + c->s16[i]);
853 }
854}
855
d81c2040
MCA
856#define VMRG_DO(name, element, access, ofs) \
857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
858 { \
859 ppc_avr_t result; \
860 int i, half = ARRAY_SIZE(r->element) / 2; \
861 \
862 for (i = 0; i < half; i++) { \
863 result.access(i * 2 + 0) = a->access(i + ofs); \
864 result.access(i * 2 + 1) = b->access(i + ofs); \
865 } \
866 *r = result; \
867 }
868
869#define VMRG(suffix, element, access) \
870 VMRG_DO(mrgl##suffix, element, access, half) \
871 VMRG_DO(mrgh##suffix, element, access, 0)
872VMRG(b, u8, VsrB)
873VMRG(h, u16, VsrH)
874VMRG(w, u32, VsrW)
64654ded
BS
875#undef VMRG_DO
876#undef VMRG
64654ded 877
d15f74fb
BS
878void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
880{
881 int32_t prod[16];
882 int i;
883
884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
885 prod[i] = (int32_t)a->s8[i] * b->u8[i];
886 }
887
888 VECTOR_FOR_INORDER_I(i, s32) {
889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
890 prod[4 * i + 2] + prod[4 * i + 3];
891 }
892}
893
d15f74fb
BS
894void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
896{
897 int32_t prod[8];
898 int i;
899
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 prod[i] = a->s16[i] * b->s16[i];
902 }
903
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
906 }
907}
908
d15f74fb
BS
909void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
911{
912 int32_t prod[8];
913 int i;
914 int sat = 0;
915
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = (int32_t)a->s16[i] * b->s16[i];
918 }
919
920 VECTOR_FOR_INORDER_I(i, s32) {
921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922
923 r->u32[i] = cvtsdsw(t, &sat);
924 }
925
926 if (sat) {
6175f5a0 927 set_vscr_sat(env);
64654ded
BS
928 }
929}
930
d15f74fb
BS
931void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
932 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
933{
934 uint16_t prod[16];
935 int i;
936
937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
938 prod[i] = a->u8[i] * b->u8[i];
939 }
940
941 VECTOR_FOR_INORDER_I(i, u32) {
942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
943 prod[4 * i + 2] + prod[4 * i + 3];
944 }
945}
946
d15f74fb
BS
947void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
949{
950 uint32_t prod[8];
951 int i;
952
953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
954 prod[i] = a->u16[i] * b->u16[i];
955 }
956
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
959 }
960}
961
d15f74fb
BS
962void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
963 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
964{
965 uint32_t prod[8];
966 int i;
967 int sat = 0;
968
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
971 }
972
973 VECTOR_FOR_INORDER_I(i, s32) {
974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975
976 r->u32[i] = cvtuduw(t, &sat);
977 }
978
979 if (sat) {
6175f5a0 980 set_vscr_sat(env);
64654ded
BS
981 }
982}
983
4fbc89ed 984#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
80eca687 985 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
986 { \
987 int i; \
988 \
4fbc89ed
MCA
989 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
990 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
991 (cast)b->mul_access(i); \
992 } \
993 }
994
995#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
80eca687 996 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
4fbc89ed
MCA
997 { \
998 int i; \
999 \
1000 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1001 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1002 (cast)b->mul_access(i + 1); \
64654ded
BS
1003 } \
1004 }
4fbc89ed
MCA
1005
1006#define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
80eca687
LMC
1007 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1008 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1009VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1010VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1011VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1012VMUL(UB, u8, VsrB, VsrH, uint16_t)
1013VMUL(UH, u16, VsrH, VsrW, uint32_t)
1014VMUL(UW, u32, VsrW, VsrD, uint64_t)
4fbc89ed
MCA
1015#undef VMUL_DO_EVN
1016#undef VMUL_DO_ODD
64654ded
BS
1017#undef VMUL
1018
41c2877f
MF
1019void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1020 target_ulong uim)
1021{
1022 int i, idx;
1023 ppc_vsr_t tmp = { .u64 = {0, 0} };
1024
1025 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1026 if ((pcv->VsrB(i) >> 5) == uim) {
1027 idx = pcv->VsrB(i) & 0x1f;
1028 if (idx < ARRAY_SIZE(t->u8)) {
1029 tmp.VsrB(i) = s0->VsrB(idx);
1030 } else {
1031 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1032 }
1033 }
1034 }
1035
1036 *t = tmp;
1037}
1038
28347fe2 1039void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1040{
1041 ppc_avr_t result;
1042 int i;
1043
60594fea
MCA
1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1045 int s = c->VsrB(i) & 0x1f;
64654ded 1046 int index = s & 0xf;
64654ded
BS
1047
1048 if (s & 0x10) {
60594fea 1049 result.VsrB(i) = b->VsrB(index);
64654ded 1050 } else {
60594fea 1051 result.VsrB(i) = a->VsrB(index);
64654ded
BS
1052 }
1053 }
1054 *r = result;
1055}
1056
28347fe2 1057void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
ab045436
RS
1058{
1059 ppc_avr_t result;
1060 int i;
1061
60594fea
MCA
1062 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1063 int s = c->VsrB(i) & 0x1f;
ab045436 1064 int index = 15 - (s & 0xf);
ab045436
RS
1065
1066 if (s & 0x10) {
60594fea 1067 result.VsrB(i) = a->VsrB(index);
ab045436 1068 } else {
60594fea 1069 result.VsrB(i) = b->VsrB(index);
ab045436
RS
1070 }
1071 }
1072 *r = result;
1073}
1074
618574dd 1075#define XXGENPCV_BE_EXP(NAME, SZ) \
b090f4f1
MF
1076void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1077{ \
1078 ppc_vsr_t tmp; \
1079 \
1080 /* Initialize tmp with the result of an all-zeros mask */ \
1081 tmp.VsrD(0) = 0x1011121314151617; \
1082 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1083 \
1084 /* Iterate over the most significant byte of each element */ \
1085 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1086 if (b->VsrB(i) & 0x80) { \
1087 /* Update each byte of the element */ \
1088 for (int k = 0; k < SZ; k++) { \
1089 tmp.VsrB(i + k) = j + k; \
1090 } \
1091 j += SZ; \
1092 } \
1093 } \
1094 \
1095 *t = tmp; \
618574dd
MF
1096}
1097
1098#define XXGENPCV_BE_COMP(NAME, SZ) \
b090f4f1
MF
1099void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1100{ \
1101 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1102 \
1103 /* Iterate over the most significant byte of each element */ \
1104 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1105 if (b->VsrB(i) & 0x80) { \
1106 /* Update each byte of the element */ \
1107 for (int k = 0; k < SZ; k++) { \
1108 tmp.VsrB(j + k) = i + k; \
1109 } \
1110 j += SZ; \
1111 } \
1112 } \
1113 \
1114 *t = tmp; \
618574dd
MF
1115}
1116
1117#define XXGENPCV_LE_EXP(NAME, SZ) \
b090f4f1
MF
1118void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1119{ \
1120 ppc_vsr_t tmp; \
1121 \
1122 /* Initialize tmp with the result of an all-zeros mask */ \
1123 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1124 tmp.VsrD(1) = 0x1716151413121110; \
1125 \
1126 /* Iterate over the most significant byte of each element */ \
1127 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1128 /* Reverse indexing of "i" */ \
1129 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1130 if (b->VsrB(idx) & 0x80) { \
1131 /* Update each byte of the element */ \
1132 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1133 tmp.VsrB(idx + rk) = j + k; \
1134 } \
1135 j += SZ; \
1136 } \
1137 } \
1138 \
1139 *t = tmp; \
618574dd
MF
1140}
1141
1142#define XXGENPCV_LE_COMP(NAME, SZ) \
b090f4f1
MF
1143void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1144{ \
1145 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1146 \
1147 /* Iterate over the most significant byte of each element */ \
1148 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1149 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1150 /* Update each byte of the element */ \
1151 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1152 /* Reverse indexing of "j" */ \
1153 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1154 tmp.VsrB(idx + rk) = i + k; \
1155 } \
1156 j += SZ; \
1157 } \
1158 } \
1159 \
1160 *t = tmp; \
1161}
1162
618574dd
MF
1163#define XXGENPCV(NAME, SZ) \
1164 XXGENPCV_BE_EXP(NAME, SZ) \
1165 XXGENPCV_BE_COMP(NAME, SZ) \
1166 XXGENPCV_LE_EXP(NAME, SZ) \
1167 XXGENPCV_LE_COMP(NAME, SZ) \
1168
b090f4f1
MF
1169XXGENPCV(XXGENPCVBM, 1)
1170XXGENPCV(XXGENPCVHM, 2)
1171XXGENPCV(XXGENPCVWM, 4)
1172XXGENPCV(XXGENPCVDM, 8)
618574dd
MF
1173
1174#undef XXGENPCV_BE_EXP
1175#undef XXGENPCV_BE_COMP
1176#undef XXGENPCV_LE_EXP
1177#undef XXGENPCV_LE_COMP
b090f4f1
MF
1178#undef XXGENPCV
1179
e03b5686 1180#if HOST_BIG_ENDIAN
4d82038e 1181#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1182#define VBPERMD_INDEX(i) (i)
4d82038e 1183#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
01fe9a47 1184#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
4d82038e 1185#else
b6cb41b2 1186#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
01fe9a47 1187#define VBPERMD_INDEX(i) (1 - i)
4d82038e 1188#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
01fe9a47
RS
1189#define EXTRACT_BIT(avr, i, index) \
1190 (extract64((avr)->u64[1 - i], 63 - index, 1))
4d82038e
TM
1191#endif
1192
01fe9a47
RS
1193void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1194{
1195 int i, j;
1196 ppc_avr_t result = { .u64 = { 0, 0 } };
1197 VECTOR_FOR_INORDER_I(i, u64) {
1198 for (j = 0; j < 8; j++) {
1199 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1200 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1201 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1202 }
1203 }
1204 }
1205 *r = result;
1206}
1207
4d82038e
TM
1208void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1209{
1210 int i;
1211 uint64_t perm = 0;
1212
1213 VECTOR_FOR_INORDER_I(i, u8) {
1214 int index = VBPERMQ_INDEX(b, i);
1215
1216 if (index < 128) {
b6cb41b2 1217 uint64_t mask = (1ull << (63 - (index & 0x3F)));
4d82038e
TM
1218 if (a->u64[VBPERMQ_DW(index)] & mask) {
1219 perm |= (0x8000 >> i);
1220 }
1221 }
1222 }
1223
3c385a93
MCA
1224 r->VsrD(0) = perm;
1225 r->VsrD(1) = 0;
4d82038e
TM
1226}
1227
1228#undef VBPERMQ_INDEX
1229#undef VBPERMQ_DW
1230
b8476fc7
TM
1231#define PMSUM(name, srcfld, trgfld, trgtyp) \
1232void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1233{ \
1234 int i, j; \
b6cb41b2 1235 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
b8476fc7
TM
1236 \
1237 VECTOR_FOR_INORDER_I(i, srcfld) { \
1238 prod[i] = 0; \
1239 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
b6cb41b2 1240 if (a->srcfld[i] & (1ull << j)) { \
b8476fc7
TM
1241 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1242 } \
1243 } \
1244 } \
1245 \
1246 VECTOR_FOR_INORDER_I(i, trgfld) { \
b6cb41b2 1247 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
b8476fc7
TM
1248 } \
1249}
1250
1251PMSUM(vpmsumb, u8, u16, uint16_t)
1252PMSUM(vpmsumh, u16, u32, uint32_t)
1253PMSUM(vpmsumw, u32, u64, uint64_t)
1254
1255void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1256{
1257
1258#ifdef CONFIG_INT128
1259 int i, j;
1260 __uint128_t prod[2];
1261
1262 VECTOR_FOR_INORDER_I(i, u64) {
1263 prod[i] = 0;
1264 for (j = 0; j < 64; j++) {
b6cb41b2 1265 if (a->u64[i] & (1ull << j)) {
b8476fc7
TM
1266 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1267 }
1268 }
1269 }
1270
1271 r->u128 = prod[0] ^ prod[1];
1272
1273#else
1274 int i, j;
1275 ppc_avr_t prod[2];
1276
1277 VECTOR_FOR_INORDER_I(i, u64) {
3c385a93 1278 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
b8476fc7 1279 for (j = 0; j < 64; j++) {
b6cb41b2 1280 if (a->u64[i] & (1ull << j)) {
b8476fc7
TM
1281 ppc_avr_t bshift;
1282 if (j == 0) {
3c385a93
MCA
1283 bshift.VsrD(0) = 0;
1284 bshift.VsrD(1) = b->u64[i];
b8476fc7 1285 } else {
3c385a93
MCA
1286 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1287 bshift.VsrD(1) = b->u64[i] << j;
b8476fc7 1288 }
3c385a93
MCA
1289 prod[i].VsrD(1) ^= bshift.VsrD(1);
1290 prod[i].VsrD(0) ^= bshift.VsrD(0);
b8476fc7
TM
1291 }
1292 }
1293 }
1294
3c385a93
MCA
1295 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1296 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
b8476fc7
TM
1297#endif
1298}
1299
1300
e03b5686 1301#if HOST_BIG_ENDIAN
64654ded
BS
1302#define PKBIG 1
1303#else
1304#define PKBIG 0
1305#endif
1306void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1307{
1308 int i, j;
1309 ppc_avr_t result;
e03b5686 1310#if HOST_BIG_ENDIAN
64654ded
BS
1311 const ppc_avr_t *x[2] = { a, b };
1312#else
1313 const ppc_avr_t *x[2] = { b, a };
1314#endif
1315
1316 VECTOR_FOR_INORDER_I(i, u64) {
1317 VECTOR_FOR_INORDER_I(j, u32) {
1318 uint32_t e = x[i]->u32[j];
1319
b6cb41b2
DG
1320 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1321 ((e >> 6) & 0x3e0) |
1322 ((e >> 3) & 0x1f));
64654ded
BS
1323 }
1324 }
1325 *r = result;
1326}
1327
1328#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1329 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1330 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1331 { \
1332 int i; \
1333 int sat = 0; \
1334 ppc_avr_t result; \
1335 ppc_avr_t *a0 = PKBIG ? a : b; \
1336 ppc_avr_t *a1 = PKBIG ? b : a; \
1337 \
1338 VECTOR_FOR_INORDER_I(i, from) { \
1339 result.to[i] = cvt(a0->from[i], &sat); \
b6cb41b2 1340 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
64654ded
BS
1341 } \
1342 *r = result; \
1343 if (dosat && sat) { \
6175f5a0 1344 set_vscr_sat(env); \
64654ded
BS
1345 } \
1346 }
1347#define I(x, y) (x)
1348VPK(shss, s16, s8, cvtshsb, 1)
1349VPK(shus, s16, u8, cvtshub, 1)
1350VPK(swss, s32, s16, cvtswsh, 1)
1351VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1352VPK(sdss, s64, s32, cvtsdsw, 1)
1353VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1354VPK(uhus, u16, u8, cvtuhub, 1)
1355VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1356VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1357VPK(uhum, u16, u8, I, 0)
1358VPK(uwum, u32, u16, I, 0)
024215b2 1359VPK(udum, u64, u32, I, 0)
64654ded
BS
1360#undef I
1361#undef VPK
1362#undef PKBIG
1363
d15f74fb 1364void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1365{
1366 int i;
1367
05ee3e8a
MCA
1368 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1369 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
64654ded
BS
1370 }
1371}
1372
1373#define VRFI(suffix, rounding) \
d15f74fb
BS
1374 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1375 ppc_avr_t *b) \
64654ded
BS
1376 { \
1377 int i; \
1378 float_status s = env->vec_status; \
1379 \
1380 set_float_rounding_mode(rounding, &s); \
05ee3e8a
MCA
1381 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1382 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
64654ded
BS
1383 } \
1384 }
1385VRFI(n, float_round_nearest_even)
1386VRFI(m, float_round_down)
1387VRFI(p, float_round_up)
1388VRFI(z, float_round_to_zero)
1389#undef VRFI
1390
d15f74fb 1391void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1392{
1393 int i;
1394
05ee3e8a
MCA
1395 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1396 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
64654ded 1397
05ee3e8a 1398 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1399 }
1400}
1401
02c74f0e
MF
1402#define VRLMI(name, size, element, insert) \
1403void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1404{ \
1405 int i; \
1406 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1407 uint##size##_t src1 = a->element[i]; \
1408 uint##size##_t src2 = b->element[i]; \
1409 uint##size##_t src3 = r->element[i]; \
1410 uint##size##_t begin, end, shift, mask, rot_val; \
1411 \
1412 shift = extract##size(src2, 0, 6); \
1413 end = extract##size(src2, 8, 6); \
1414 begin = extract##size(src2, 16, 6); \
1415 rot_val = rol##size(src1, shift); \
1416 mask = mask_u##size(begin, end); \
1417 if (insert) { \
1418 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1419 } else { \
1420 r->element[i] = (rot_val & mask); \
1421 } \
1422 } \
3e00884f
GS
1423}
1424
02c74f0e
MF
1425VRLMI(VRLDMI, 64, u64, 1);
1426VRLMI(VRLWMI, 32, u32, 1);
1427VRLMI(VRLDNM, 64, u64, 0);
1428VRLMI(VRLWNM, 32, u32, 0);
3e00884f 1429
d15f74fb 1430void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1431{
1432 int i;
1433
05ee3e8a
MCA
1434 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1435 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
64654ded
BS
1436 }
1437}
1438
d15f74fb 1439void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1440{
1441 int i;
1442
05ee3e8a
MCA
1443 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1444 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
64654ded
BS
1445 }
1446}
1447
f297c4c6
MF
1448#define VEXTU_X_DO(name, size, left) \
1449target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1450{ \
1451 int index = (a & 0xf) * 8; \
1452 if (left) { \
1453 index = 128 - index - size; \
1454 } \
1455 return int128_getlo(int128_rshift(b->s128, index)) & \
1456 MAKE_64BIT_MASK(0, size); \
1457}
60caf221
AK
1458VEXTU_X_DO(vextublx, 8, 1)
1459VEXTU_X_DO(vextuhlx, 16, 1)
1460VEXTU_X_DO(vextuwlx, 32, 1)
1461VEXTU_X_DO(vextubrx, 8, 0)
1462VEXTU_X_DO(vextuhrx, 16, 0)
1463VEXTU_X_DO(vextuwrx, 32, 0)
1464#undef VEXTU_X_DO
1465
5644a175
VAS
1466void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1467{
1468 int i;
1469 unsigned int shift, bytes, size;
1470
1471 size = ARRAY_SIZE(r->u8);
1472 for (i = 0; i < size; i++) {
63be02fc
AB
1473 shift = b->VsrB(i) & 0x7; /* extract shift value */
1474 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1475 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1476 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
5644a175
VAS
1477 }
1478}
1479
4004c1db
VAS
1480void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1481{
1482 int i;
1483 unsigned int shift, bytes;
1484
b6cb41b2
DG
1485 /*
1486 * Use reverse order, as destination and source register can be
1487 * same. Its being modified in place saving temporary, reverse
1488 * order will guarantee that computed result is not fed back.
4004c1db
VAS
1489 */
1490 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
63be02fc
AB
1491 shift = b->VsrB(i) & 0x7; /* extract shift value */
1492 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
4004c1db 1493 /* extract adjacent bytes */
63be02fc 1494 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
4004c1db
VAS
1495 }
1496}
1497
64654ded
BS
1498void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1499{
1500 int sh = shift & 0xf;
1501 int i;
1502 ppc_avr_t result;
1503
64654ded
BS
1504 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1505 int index = sh + i;
1506 if (index > 0xf) {
60594fea 1507 result.VsrB(i) = b->VsrB(index - 0x10);
64654ded 1508 } else {
60594fea 1509 result.VsrB(i) = a->VsrB(index);
64654ded
BS
1510 }
1511 }
64654ded
BS
1512 *r = result;
1513}
1514
1515void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1516{
3c385a93 1517 int sh = (b->VsrB(0xf) >> 3) & 0xf;
64654ded 1518
e03b5686 1519#if HOST_BIG_ENDIAN
64654ded 1520 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
b6cb41b2 1521 memset(&r->u8[16 - sh], 0, sh);
64654ded
BS
1522#else
1523 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1524 memset(&r->u8[0], 0, sh);
1525#endif
1526}
1527
e03b5686 1528#if HOST_BIG_ENDIAN
2cc12af3
MF
1529#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1530#else
1531#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1532#endif
1533
1534#define VINSX(SUFFIX, TYPE) \
1535void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1536 uint64_t val, target_ulong index) \
1537{ \
1538 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1539 target_long idx = index; \
1540 \
1541 if (idx < 0 || idx > maxidx) { \
1542 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1543 qemu_log_mask(LOG_GUEST_ERROR, \
1544 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1545 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1546 } else { \
1547 TYPE src = val; \
1548 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1549 } \
1550}
1551VINSX(B, uint8_t)
1552VINSX(H, uint16_t)
1553VINSX(W, uint32_t)
1554VINSX(D, uint64_t)
1555#undef ELEM_ADDR
1556#undef VINSX
e03b5686 1557#if HOST_BIG_ENDIAN
28110b72
MF
1558#define VEXTDVLX(NAME, SIZE) \
1559void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1560 target_ulong index) \
1561{ \
1562 const target_long idx = index; \
1563 ppc_avr_t tmp[2] = { *a, *b }; \
1564 memset(t, 0, sizeof(*t)); \
1565 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1566 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1567 } else { \
1568 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1569 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1570 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1571 } \
1572}
1573#else
1574#define VEXTDVLX(NAME, SIZE) \
1575void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1576 target_ulong index) \
1577{ \
1578 const target_long idx = index; \
1579 ppc_avr_t tmp[2] = { *b, *a }; \
1580 memset(t, 0, sizeof(*t)); \
1581 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1582 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1583 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1584 } else { \
1585 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1586 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1587 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1588 } \
1589}
1590#endif
1591VEXTDVLX(VEXTDUBVLX, 1)
1592VEXTDVLX(VEXTDUHVLX, 2)
1593VEXTDVLX(VEXTDUWVLX, 4)
1594VEXTDVLX(VEXTDDVLX, 8)
1595#undef VEXTDVLX
e03b5686 1596#if HOST_BIG_ENDIAN
b5d569a1
RS
1597#define VEXTRACT(suffix, element) \
1598 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1599 { \
1600 uint32_t es = sizeof(r->element[0]); \
1601 memmove(&r->u8[8 - es], &b->u8[index], es); \
1602 memset(&r->u8[8], 0, 8); \
1603 memset(&r->u8[0], 0, 8 - es); \
1604 }
1605#else
1606#define VEXTRACT(suffix, element) \
1607 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1608 { \
1609 uint32_t es = sizeof(r->element[0]); \
1610 uint32_t s = (16 - index) - es; \
1611 memmove(&r->u8[8], &b->u8[s], es); \
1612 memset(&r->u8[0], 0, 8); \
1613 memset(&r->u8[8 + es], 0, 8 - es); \
1614 }
1615#endif
1616VEXTRACT(ub, u8)
1617VEXTRACT(uh, u16)
1618VEXTRACT(uw, u32)
1619VEXTRACT(d, u64)
1620#undef VEXTRACT
64654ded 1621
fb5303cc
MF
1622#define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1623uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1624{ \
1625 int i, idx, crf = 0; \
1626 \
1627 for (i = 0; i < NUM_ELEMS; i++) { \
1628 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1629 if (b->Vsr##ELEM(idx)) { \
1630 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1631 } else { \
1632 crf = 0b0010; \
1633 break; \
1634 } \
1635 } \
1636 \
1637 for (; i < NUM_ELEMS; i++) { \
1638 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1639 t->Vsr##ELEM(idx) = 0; \
1640 } \
1641 \
1642 return crf; \
1643}
1644VSTRI(VSTRIBL, B, 16, true)
1645VSTRI(VSTRIBR, B, 16, false)
1646VSTRI(VSTRIHL, H, 8, true)
1647VSTRI(VSTRIHR, H, 8, false)
1648#undef VSTRI
1649
5ba5335d
MCA
1650void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1651 ppc_vsr_t *xb, uint32_t index)
8ad901e5 1652{
03b32c09 1653 ppc_vsr_t t = { };
8ad901e5
ND
1654 size_t es = sizeof(uint32_t);
1655 uint32_t ext_index;
1656 int i;
1657
8ad901e5
ND
1658 ext_index = index;
1659 for (i = 0; i < es; i++, ext_index++) {
03b32c09 1660 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
8ad901e5 1661 }
8ad901e5 1662
03b32c09 1663 *xt = t;
8ad901e5
ND
1664}
1665
5ba5335d
MCA
1666void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1667 ppc_vsr_t *xb, uint32_t index)
3398b742 1668{
03b32c09 1669 ppc_vsr_t t = *xt;
3398b742
ND
1670 size_t es = sizeof(uint32_t);
1671 int ins_index, i = 0;
1672
3398b742
ND
1673 ins_index = index;
1674 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
03b32c09 1675 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
3398b742 1676 }
3398b742 1677
03b32c09 1678 *xt = t;
3398b742
ND
1679}
1680
1015fcab
MF
1681void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1682 uint32_t desc)
1683{
1684 /*
1685 * Instead of processing imm bit-by-bit, we'll skip the computation of
1686 * conjunctions whose corresponding bit is unset.
1687 */
1688 int bit, imm = simd_data(desc);
1689 Int128 conj, disj = int128_zero();
1690
1691 /* Iterate over set bits from the least to the most significant bit */
1692 while (imm) {
1693 /*
1694 * Get the next bit to be processed with ctz64. Invert the result of
1695 * ctz64 to match the indexing used by PowerISA.
1696 */
1697 bit = 7 - ctzl(imm);
1698 if (bit & 0x4) {
1699 conj = a->s128;
1700 } else {
1701 conj = int128_not(a->s128);
1702 }
1703 if (bit & 0x2) {
1704 conj = int128_and(conj, b->s128);
1705 } else {
1706 conj = int128_and(conj, int128_not(b->s128));
1707 }
1708 if (bit & 0x1) {
1709 conj = int128_and(conj, c->s128);
1710 } else {
1711 conj = int128_and(conj, int128_not(c->s128));
1712 }
1713 disj = int128_or(disj, conj);
1714
1715 /* Unset the least significant bit that is set */
1716 imm &= imm - 1;
1717 }
1718
1719 t->s128 = disj;
1720}
1721
788c6399
MF
1722#define XXBLEND(name, sz) \
1723void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1724 ppc_avr_t *c, uint32_t desc) \
1725{ \
1726 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1727 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1728 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1729 } \
1730}
1731XXBLEND(B, 8)
1732XXBLEND(H, 16)
1733XXBLEND(W, 32)
1734XXBLEND(D, 64)
1735#undef XXBLEND
1736
cc8b6e76
ND
1737#define VNEG(name, element) \
1738void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1739{ \
1740 int i; \
60594fea 1741 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
cc8b6e76
ND
1742 r->element[i] = -b->element[i]; \
1743 } \
1744}
1745VNEG(vnegw, s32)
1746VNEG(vnegd, s64)
1747#undef VNEG
1748
64654ded
BS
1749void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1750{
3c385a93 1751 int sh = (b->VsrB(0xf) >> 3) & 0xf;
64654ded 1752
e03b5686 1753#if HOST_BIG_ENDIAN
64654ded
BS
1754 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1755 memset(&r->u8[0], 0, sh);
1756#else
1757 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1758 memset(&r->u8[16 - sh], 0, sh);
1759#endif
1760}
1761
1762void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1763{
1764 int i;
1765
1766 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1767 r->u32[i] = a->u32[i] >= b->u32[i];
1768 }
1769}
1770
d15f74fb 1771void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1772{
1773 int64_t t;
1774 int i, upper;
1775 ppc_avr_t result;
1776 int sat = 0;
1777
60594fea
MCA
1778 upper = ARRAY_SIZE(r->s32) - 1;
1779 t = (int64_t)b->VsrSW(upper);
64654ded 1780 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
60594fea
MCA
1781 t += a->VsrSW(i);
1782 result.VsrSW(i) = 0;
64654ded 1783 }
60594fea 1784 result.VsrSW(upper) = cvtsdsw(t, &sat);
64654ded
BS
1785 *r = result;
1786
1787 if (sat) {
6175f5a0 1788 set_vscr_sat(env);
64654ded
BS
1789 }
1790}
1791
d15f74fb 1792void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1793{
1794 int i, j, upper;
1795 ppc_avr_t result;
1796 int sat = 0;
1797
64654ded 1798 upper = 1;
64654ded 1799 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
60594fea 1800 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
64654ded 1801
7fa0ddc1 1802 result.VsrD(i) = 0;
64654ded 1803 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
60594fea 1804 t += a->VsrSW(2 * i + j);
64654ded 1805 }
60594fea 1806 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
64654ded
BS
1807 }
1808
1809 *r = result;
1810 if (sat) {
6175f5a0 1811 set_vscr_sat(env);
64654ded
BS
1812 }
1813}
1814
d15f74fb 1815void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1816{
1817 int i, j;
1818 int sat = 0;
1819
1820 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1821 int64_t t = (int64_t)b->s32[i];
1822
1823 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1824 t += a->s8[4 * i + j];
1825 }
1826 r->s32[i] = cvtsdsw(t, &sat);
1827 }
1828
1829 if (sat) {
6175f5a0 1830 set_vscr_sat(env);
64654ded
BS
1831 }
1832}
1833
d15f74fb 1834void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1835{
1836 int sat = 0;
1837 int i;
1838
1839 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1840 int64_t t = (int64_t)b->s32[i];
1841
1842 t += a->s16[2 * i] + a->s16[2 * i + 1];
1843 r->s32[i] = cvtsdsw(t, &sat);
1844 }
1845
1846 if (sat) {
6175f5a0 1847 set_vscr_sat(env);
64654ded
BS
1848 }
1849}
1850
d15f74fb 1851void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1852{
1853 int i, j;
1854 int sat = 0;
1855
1856 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1857 uint64_t t = (uint64_t)b->u32[i];
1858
1859 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1860 t += a->u8[4 * i + j];
1861 }
1862 r->u32[i] = cvtuduw(t, &sat);
1863 }
1864
1865 if (sat) {
6175f5a0 1866 set_vscr_sat(env);
64654ded
BS
1867 }
1868}
1869
e03b5686 1870#if HOST_BIG_ENDIAN
64654ded
BS
1871#define UPKHI 1
1872#define UPKLO 0
1873#else
1874#define UPKHI 0
1875#define UPKLO 1
1876#endif
1877#define VUPKPX(suffix, hi) \
1878 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1879 { \
1880 int i; \
1881 ppc_avr_t result; \
1882 \
1883 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
b6cb41b2 1884 uint16_t e = b->u16[hi ? i : i + 4]; \
64654ded
BS
1885 uint8_t a = (e >> 15) ? 0xff : 0; \
1886 uint8_t r = (e >> 10) & 0x1f; \
1887 uint8_t g = (e >> 5) & 0x1f; \
1888 uint8_t b = e & 0x1f; \
1889 \
1890 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1891 } \
1892 *r = result; \
1893 }
1894VUPKPX(lpx, UPKLO)
1895VUPKPX(hpx, UPKHI)
1896#undef VUPKPX
1897
1898#define VUPK(suffix, unpacked, packee, hi) \
1899 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1900 { \
1901 int i; \
1902 ppc_avr_t result; \
1903 \
1904 if (hi) { \
1905 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1906 result.unpacked[i] = b->packee[i]; \
1907 } \
1908 } else { \
1909 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1910 i++) { \
1911 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1912 } \
1913 } \
1914 *r = result; \
1915 }
1916VUPK(hsb, s16, s8, UPKHI)
1917VUPK(hsh, s32, s16, UPKHI)
4430e076 1918VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
1919VUPK(lsb, s16, s8, UPKLO)
1920VUPK(lsh, s32, s16, UPKLO)
4430e076 1921VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
1922#undef VUPK
1923#undef UPKHI
1924#undef UPKLO
1925
f293f04a
TM
1926#define VGENERIC_DO(name, element) \
1927 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1928 { \
1929 int i; \
1930 \
60594fea 1931 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
f293f04a
TM
1932 r->element[i] = name(b->element[i]); \
1933 } \
1934 }
1935
1936#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1937#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
f293f04a
TM
1938
1939VGENERIC_DO(clzb, u8)
1940VGENERIC_DO(clzh, u16)
f293f04a
TM
1941
1942#undef clzb
1943#undef clzh
f293f04a 1944
a5ad8fbf
RS
1945#define ctzb(v) ((v) ? ctz32(v) : 8)
1946#define ctzh(v) ((v) ? ctz32(v) : 16)
1947#define ctzw(v) ctz32((v))
1948#define ctzd(v) ctz64((v))
1949
1950VGENERIC_DO(ctzb, u8)
1951VGENERIC_DO(ctzh, u16)
1952VGENERIC_DO(ctzw, u32)
1953VGENERIC_DO(ctzd, u64)
1954
1955#undef ctzb
1956#undef ctzh
1957#undef ctzw
1958#undef ctzd
1959
e13500b3
TM
1960#define popcntb(v) ctpop8(v)
1961#define popcnth(v) ctpop16(v)
1962#define popcntw(v) ctpop32(v)
1963#define popcntd(v) ctpop64(v)
1964
1965VGENERIC_DO(popcntb, u8)
1966VGENERIC_DO(popcnth, u16)
1967VGENERIC_DO(popcntw, u32)
1968VGENERIC_DO(popcntd, u64)
1969
1970#undef popcntb
1971#undef popcnth
1972#undef popcntw
1973#undef popcntd
f293f04a
TM
1974
1975#undef VGENERIC_DO
1976
e03b5686 1977#if HOST_BIG_ENDIAN
b41da4eb
TM
1978#define QW_ONE { .u64 = { 0, 1 } }
1979#else
1980#define QW_ONE { .u64 = { 1, 0 } }
1981#endif
1982
1983#ifndef CONFIG_INT128
1984
1985static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1986{
1987 t->u64[0] = ~a.u64[0];
1988 t->u64[1] = ~a.u64[1];
1989}
1990
1991static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1992{
3c385a93 1993 if (a.VsrD(0) < b.VsrD(0)) {
b41da4eb 1994 return -1;
3c385a93 1995 } else if (a.VsrD(0) > b.VsrD(0)) {
b41da4eb 1996 return 1;
3c385a93 1997 } else if (a.VsrD(1) < b.VsrD(1)) {
b41da4eb 1998 return -1;
3c385a93 1999 } else if (a.VsrD(1) > b.VsrD(1)) {
b41da4eb
TM
2000 return 1;
2001 } else {
2002 return 0;
2003 }
2004}
2005
2006static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2007{
3c385a93
MCA
2008 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2009 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2010 (~a.VsrD(1) < b.VsrD(1));
b41da4eb
TM
2011}
2012
2013static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2014{
2015 ppc_avr_t not_a;
3c385a93
MCA
2016 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2017 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2018 (~a.VsrD(1) < b.VsrD(1));
b41da4eb
TM
2019 avr_qw_not(&not_a, a);
2020 return avr_qw_cmpu(not_a, b) < 0;
2021}
2022
2023#endif
2024
2025void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2026{
2027#ifdef CONFIG_INT128
2028 r->u128 = a->u128 + b->u128;
2029#else
2030 avr_qw_add(r, *a, *b);
2031#endif
2032}
2033
2034void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2035{
2036#ifdef CONFIG_INT128
2037 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2038#else
2039
3c385a93 2040 if (c->VsrD(1) & 1) {
b41da4eb
TM
2041 ppc_avr_t tmp;
2042
3c385a93
MCA
2043 tmp.VsrD(0) = 0;
2044 tmp.VsrD(1) = c->VsrD(1) & 1;
b41da4eb
TM
2045 avr_qw_add(&tmp, *a, tmp);
2046 avr_qw_add(r, tmp, *b);
2047 } else {
2048 avr_qw_add(r, *a, *b);
2049 }
2050#endif
2051}
2052
2053void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2054{
2055#ifdef CONFIG_INT128
2056 r->u128 = (~a->u128 < b->u128);
2057#else
2058 ppc_avr_t not_a;
2059
2060 avr_qw_not(&not_a, *a);
2061
3c385a93
MCA
2062 r->VsrD(0) = 0;
2063 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
b41da4eb
TM
2064#endif
2065}
2066
2067void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2068{
2069#ifdef CONFIG_INT128
2070 int carry_out = (~a->u128 < b->u128);
2071 if (!carry_out && (c->u128 & 1)) {
2072 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2073 ((a->u128 != 0) || (b->u128 != 0));
2074 }
2075 r->u128 = carry_out;
2076#else
2077
3c385a93 2078 int carry_in = c->VsrD(1) & 1;
b41da4eb
TM
2079 int carry_out = 0;
2080 ppc_avr_t tmp;
2081
2082 carry_out = avr_qw_addc(&tmp, *a, *b);
2083
2084 if (!carry_out && carry_in) {
2085 ppc_avr_t one = QW_ONE;
2086 carry_out = avr_qw_addc(&tmp, tmp, one);
2087 }
3c385a93
MCA
2088 r->VsrD(0) = 0;
2089 r->VsrD(1) = carry_out;
b41da4eb
TM
2090#endif
2091}
2092
2093void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2094{
2095#ifdef CONFIG_INT128
2096 r->u128 = a->u128 - b->u128;
2097#else
2098 ppc_avr_t tmp;
2099 ppc_avr_t one = QW_ONE;
2100
2101 avr_qw_not(&tmp, *b);
2102 avr_qw_add(&tmp, *a, tmp);
2103 avr_qw_add(r, tmp, one);
2104#endif
2105}
2106
2107void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2108{
2109#ifdef CONFIG_INT128
2110 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2111#else
2112 ppc_avr_t tmp, sum;
2113
2114 avr_qw_not(&tmp, *b);
2115 avr_qw_add(&sum, *a, tmp);
2116
3c385a93
MCA
2117 tmp.VsrD(0) = 0;
2118 tmp.VsrD(1) = c->VsrD(1) & 1;
b41da4eb
TM
2119 avr_qw_add(r, sum, tmp);
2120#endif
2121}
2122
2123void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2124{
2125#ifdef CONFIG_INT128
2126 r->u128 = (~a->u128 < ~b->u128) ||
2127 (a->u128 + ~b->u128 == (__uint128_t)-1);
2128#else
2129 int carry = (avr_qw_cmpu(*a, *b) > 0);
2130 if (!carry) {
2131 ppc_avr_t tmp;
2132 avr_qw_not(&tmp, *b);
2133 avr_qw_add(&tmp, *a, tmp);
3c385a93 2134 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
b41da4eb 2135 }
3c385a93
MCA
2136 r->VsrD(0) = 0;
2137 r->VsrD(1) = carry;
b41da4eb
TM
2138#endif
2139}
2140
2141void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2142{
2143#ifdef CONFIG_INT128
2144 r->u128 =
2145 (~a->u128 < ~b->u128) ||
2146 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2147#else
3c385a93 2148 int carry_in = c->VsrD(1) & 1;
b41da4eb
TM
2149 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2150 if (!carry_out && carry_in) {
2151 ppc_avr_t tmp;
2152 avr_qw_not(&tmp, *b);
2153 avr_qw_add(&tmp, *a, tmp);
3c385a93 2154 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
b41da4eb
TM
2155 }
2156
3c385a93
MCA
2157 r->VsrD(0) = 0;
2158 r->VsrD(1) = carry_out;
b41da4eb
TM
2159#endif
2160}
2161
e8f7b27b
TM
2162#define BCD_PLUS_PREF_1 0xC
2163#define BCD_PLUS_PREF_2 0xF
2164#define BCD_PLUS_ALT_1 0xA
2165#define BCD_NEG_PREF 0xD
2166#define BCD_NEG_ALT 0xB
2167#define BCD_PLUS_ALT_2 0xE
b8155872
JRZ
2168#define NATIONAL_PLUS 0x2B
2169#define NATIONAL_NEG 0x2D
e8f7b27b 2170
365206ae 2171#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
e8f7b27b
TM
2172
2173static int bcd_get_sgn(ppc_avr_t *bcd)
2174{
428115c3 2175 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
e8f7b27b
TM
2176 case BCD_PLUS_PREF_1:
2177 case BCD_PLUS_PREF_2:
2178 case BCD_PLUS_ALT_1:
2179 case BCD_PLUS_ALT_2:
2180 {
2181 return 1;
2182 }
2183
2184 case BCD_NEG_PREF:
2185 case BCD_NEG_ALT:
2186 {
2187 return -1;
2188 }
2189
2190 default:
2191 {
2192 return 0;
2193 }
2194 }
2195}
2196
2197static int bcd_preferred_sgn(int sgn, int ps)
2198{
2199 if (sgn >= 0) {
2200 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2201 } else {
2202 return BCD_NEG_PREF;
2203 }
2204}
2205
2206static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2207{
2208 uint8_t result;
2209 if (n & 1) {
428115c3 2210 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
e8f7b27b 2211 } else {
428115c3 2212 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
e8f7b27b
TM
2213 }
2214
2215 if (unlikely(result > 9)) {
2216 *invalid = true;
2217 }
2218 return result;
2219}
2220
2221static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2222{
2223 if (n & 1) {
428115c3
MCA
2224 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2225 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
e8f7b27b 2226 } else {
428115c3
MCA
2227 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2228 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
e8f7b27b
TM
2229 }
2230}
2231
071663df
JRZ
2232static bool bcd_is_valid(ppc_avr_t *bcd)
2233{
2234 int i;
2235 int invalid = 0;
2236
2237 if (bcd_get_sgn(bcd) == 0) {
2238 return false;
2239 }
2240
2241 for (i = 1; i < 32; i++) {
2242 bcd_get_digit(bcd, i, &invalid);
2243 if (unlikely(invalid)) {
2244 return false;
2245 }
2246 }
2247 return true;
2248}
2249
b8155872
JRZ
2250static int bcd_cmp_zero(ppc_avr_t *bcd)
2251{
3c385a93 2252 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
efa73196 2253 return CRF_EQ;
b8155872 2254 } else {
efa73196 2255 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
b8155872
JRZ
2256 }
2257}
2258
2259static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2260{
60594fea 2261 return reg->VsrH(7 - n);
b8155872
JRZ
2262}
2263
e2106d73
JRZ
2264static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2265{
60594fea 2266 reg->VsrH(7 - n) = val;
e2106d73
JRZ
2267}
2268
e8f7b27b
TM
2269static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2270{
2271 int i;
2272 int invalid = 0;
2273 for (i = 31; i > 0; i--) {
2274 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2275 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2276 if (unlikely(invalid)) {
3b163b01 2277 return 0; /* doesn't matter */
e8f7b27b
TM
2278 } else if (dig_a > dig_b) {
2279 return 1;
2280 } else if (dig_a < dig_b) {
2281 return -1;
2282 }
2283 }
2284
2285 return 0;
2286}
2287
936fda4d 2288static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
e8f7b27b
TM
2289 int *overflow)
2290{
2291 int carry = 0;
2292 int i;
936fda4d
FR
2293 int is_zero = 1;
2294
e8f7b27b
TM
2295 for (i = 1; i <= 31; i++) {
2296 uint8_t digit = bcd_get_digit(a, i, invalid) +
2297 bcd_get_digit(b, i, invalid) + carry;
936fda4d 2298 is_zero &= (digit == 0);
e8f7b27b
TM
2299 if (digit > 9) {
2300 carry = 1;
2301 digit -= 10;
2302 } else {
2303 carry = 0;
2304 }
2305
2306 bcd_put_digit(t, digit, i);
e8f7b27b
TM
2307 }
2308
2309 *overflow = carry;
936fda4d 2310 return is_zero;
e8f7b27b
TM
2311}
2312
d03b174a 2313static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
e8f7b27b
TM
2314 int *overflow)
2315{
2316 int carry = 0;
2317 int i;
d03b174a 2318
e8f7b27b
TM
2319 for (i = 1; i <= 31; i++) {
2320 uint8_t digit = bcd_get_digit(a, i, invalid) -
2321 bcd_get_digit(b, i, invalid) + carry;
e8f7b27b
TM
2322 if (digit & 0x80) {
2323 carry = -1;
2324 digit += 10;
2325 } else {
2326 carry = 0;
2327 }
2328
2329 bcd_put_digit(t, digit, i);
e8f7b27b
TM
2330 }
2331
2332 *overflow = carry;
e8f7b27b
TM
2333}
2334
2335uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2336{
2337
2338 int sgna = bcd_get_sgn(a);
2339 int sgnb = bcd_get_sgn(b);
2340 int invalid = (sgna == 0) || (sgnb == 0);
2341 int overflow = 0;
936fda4d 2342 int zero = 0;
e8f7b27b
TM
2343 uint32_t cr = 0;
2344 ppc_avr_t result = { .u64 = { 0, 0 } };
2345
2346 if (!invalid) {
2347 if (sgna == sgnb) {
428115c3 2348 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
936fda4d
FR
2349 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2350 cr = (sgna > 0) ? CRF_GT : CRF_LT;
e8f7b27b 2351 } else {
d03b174a
YB
2352 int magnitude = bcd_cmp_mag(a, b);
2353 if (magnitude > 0) {
428115c3 2354 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
d03b174a
YB
2355 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2356 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2357 } else if (magnitude < 0) {
428115c3 2358 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
d03b174a
YB
2359 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2360 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2361 } else {
428115c3 2362 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
d03b174a
YB
2363 cr = CRF_EQ;
2364 }
e8f7b27b
TM
2365 }
2366 }
2367
2368 if (unlikely(invalid)) {
3c385a93 2369 result.VsrD(0) = result.VsrD(1) = -1;
efa73196 2370 cr = CRF_SO;
e8f7b27b 2371 } else if (overflow) {
efa73196 2372 cr |= CRF_SO;
936fda4d
FR
2373 } else if (zero) {
2374 cr |= CRF_EQ;
e8f7b27b
TM
2375 }
2376
2377 *r = result;
2378
2379 return cr;
2380}
2381
2382uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2383{
2384 ppc_avr_t bcopy = *b;
2385 int sgnb = bcd_get_sgn(b);
2386 if (sgnb < 0) {
2387 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2388 } else if (sgnb > 0) {
2389 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2390 }
2391 /* else invalid ... defer to bcdadd code for proper handling */
2392
2393 return helper_bcdadd(r, a, &bcopy, ps);
2394}
f293f04a 2395
b8155872
JRZ
2396uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2397{
2398 int i;
2399 int cr = 0;
2400 uint16_t national = 0;
2401 uint16_t sgnb = get_national_digit(b, 0);
2402 ppc_avr_t ret = { .u64 = { 0, 0 } };
2403 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2404
2405 for (i = 1; i < 8; i++) {
2406 national = get_national_digit(b, i);
2407 if (unlikely(national < 0x30 || national > 0x39)) {
2408 invalid = 1;
2409 break;
2410 }
2411
2412 bcd_put_digit(&ret, national & 0xf, i);
2413 }
2414
2415 if (sgnb == NATIONAL_PLUS) {
2416 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2417 } else {
2418 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2419 }
2420
2421 cr = bcd_cmp_zero(&ret);
2422
2423 if (unlikely(invalid)) {
efa73196 2424 cr = CRF_SO;
b8155872
JRZ
2425 }
2426
2427 *r = ret;
2428
2429 return cr;
2430}
2431
e2106d73
JRZ
2432uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2433{
2434 int i;
2435 int cr = 0;
2436 int sgnb = bcd_get_sgn(b);
2437 int invalid = (sgnb == 0);
2438 ppc_avr_t ret = { .u64 = { 0, 0 } };
2439
3c385a93 2440 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
e2106d73
JRZ
2441
2442 for (i = 1; i < 8; i++) {
2443 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2444
2445 if (unlikely(invalid)) {
2446 break;
2447 }
2448 }
2449 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2450
2451 cr = bcd_cmp_zero(b);
2452
2453 if (ox_flag) {
efa73196 2454 cr |= CRF_SO;
e2106d73
JRZ
2455 }
2456
2457 if (unlikely(invalid)) {
efa73196 2458 cr = CRF_SO;
e2106d73
JRZ
2459 }
2460
2461 *r = ret;
2462
2463 return cr;
2464}
2465
38f4cb04
JRZ
2466uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2467{
2468 int i;
2469 int cr = 0;
2470 int invalid = 0;
2471 int zone_digit = 0;
2472 int zone_lead = ps ? 0xF : 0x3;
2473 int digit = 0;
2474 ppc_avr_t ret = { .u64 = { 0, 0 } };
428115c3 2475 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
38f4cb04
JRZ
2476
2477 if (unlikely((sgnb < 0xA) && ps)) {
2478 invalid = 1;
2479 }
2480
2481 for (i = 0; i < 16; i++) {
428115c3
MCA
2482 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2483 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
38f4cb04
JRZ
2484 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2485 invalid = 1;
2486 break;
2487 }
2488
2489 bcd_put_digit(&ret, digit, i + 1);
2490 }
2491
2492 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2493 (!ps && (sgnb & 0x4))) {
2494 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2495 } else {
2496 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2497 }
2498
2499 cr = bcd_cmp_zero(&ret);
2500
2501 if (unlikely(invalid)) {
efa73196 2502 cr = CRF_SO;
38f4cb04
JRZ
2503 }
2504
2505 *r = ret;
2506
2507 return cr;
2508}
2509
0a890b31
JRZ
2510uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2511{
2512 int i;
2513 int cr = 0;
2514 uint8_t digit = 0;
2515 int sgnb = bcd_get_sgn(b);
2516 int zone_lead = (ps) ? 0xF0 : 0x30;
2517 int invalid = (sgnb == 0);
2518 ppc_avr_t ret = { .u64 = { 0, 0 } };
2519
3c385a93 2520 int ox_flag = ((b->VsrD(0) >> 4) != 0);
0a890b31
JRZ
2521
2522 for (i = 0; i < 16; i++) {
2523 digit = bcd_get_digit(b, i + 1, &invalid);
2524
2525 if (unlikely(invalid)) {
2526 break;
2527 }
2528
428115c3 2529 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
0a890b31
JRZ
2530 }
2531
2532 if (ps) {
2533 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2534 } else {
2535 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2536 }
2537
2538 cr = bcd_cmp_zero(b);
2539
2540 if (ox_flag) {
efa73196 2541 cr |= CRF_SO;
0a890b31
JRZ
2542 }
2543
2544 if (unlikely(invalid)) {
efa73196 2545 cr = CRF_SO;
0a890b31
JRZ
2546 }
2547
2548 *r = ret;
2549
2550 return cr;
2551}
2552
a3d67f3e
LP
2553/**
2554 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2555 *
2556 * Returns:
2557 * > 0 if ahi|alo > bhi|blo,
2558 * 0 if ahi|alo == bhi|blo,
2559 * < 0 if ahi|alo < bhi|blo
2560 */
2561static inline int ucmp128(uint64_t alo, uint64_t ahi,
2562 uint64_t blo, uint64_t bhi)
2563{
2564 return (ahi == bhi) ?
2565 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2566 (ahi > bhi ? 1 : -1);
2567}
2568
a406c058
JRZ
2569uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2570{
2571 int i;
a3d67f3e 2572 int cr;
a406c058
JRZ
2573 uint64_t lo_value;
2574 uint64_t hi_value;
40f3e79a 2575 uint64_t rem;
a406c058
JRZ
2576 ppc_avr_t ret = { .u64 = { 0, 0 } };
2577
3c385a93
MCA
2578 if (b->VsrSD(0) < 0) {
2579 lo_value = -b->VsrSD(1);
2580 hi_value = ~b->VsrD(0) + !lo_value;
a406c058 2581 bcd_put_digit(&ret, 0xD, 0);
a3d67f3e
LP
2582
2583 cr = CRF_LT;
a406c058 2584 } else {
3c385a93
MCA
2585 lo_value = b->VsrD(1);
2586 hi_value = b->VsrD(0);
a406c058 2587 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
a406c058 2588
a3d67f3e
LP
2589 if (hi_value == 0 && lo_value == 0) {
2590 cr = CRF_EQ;
2591 } else {
2592 cr = CRF_GT;
2593 }
a406c058
JRZ
2594 }
2595
a3d67f3e
LP
2596 /*
2597 * Check src limits: abs(src) <= 10^31 - 1
2598 *
2599 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2600 */
2601 if (ucmp128(lo_value, hi_value,
2602 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2603 cr |= CRF_SO;
a406c058 2604
a3d67f3e
LP
2605 /*
2606 * According to the ISA, if src wouldn't fit in the destination
2607 * register, the result is undefined.
2608 * In that case, we leave r unchanged.
2609 */
2610 } else {
40f3e79a 2611 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
a406c058 2612
40f3e79a
LP
2613 for (i = 1; i < 16; rem /= 10, i++) {
2614 bcd_put_digit(&ret, rem % 10, i);
a3d67f3e 2615 }
a406c058 2616
a3d67f3e
LP
2617 for (; i < 32; lo_value /= 10, i++) {
2618 bcd_put_digit(&ret, lo_value % 10, i);
2619 }
2620
2621 *r = ret;
2622 }
a406c058
JRZ
2623
2624 return cr;
2625}
2626
c85bc7dd
JRZ
2627uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2628{
2629 uint8_t i;
2630 int cr;
2631 uint64_t carry;
2632 uint64_t unused;
2633 uint64_t lo_value;
2634 uint64_t hi_value = 0;
2635 int sgnb = bcd_get_sgn(b);
2636 int invalid = (sgnb == 0);
2637
2638 lo_value = bcd_get_digit(b, 31, &invalid);
2639 for (i = 30; i > 0; i--) {
2640 mulu64(&lo_value, &carry, lo_value, 10ULL);
2641 mulu64(&hi_value, &unused, hi_value, 10ULL);
2642 lo_value += bcd_get_digit(b, i, &invalid);
2643 hi_value += carry;
2644
2645 if (unlikely(invalid)) {
2646 break;
2647 }
2648 }
2649
2650 if (sgnb == -1) {
3c385a93
MCA
2651 r->VsrSD(1) = -lo_value;
2652 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
c85bc7dd 2653 } else {
3c385a93
MCA
2654 r->VsrSD(1) = lo_value;
2655 r->VsrSD(0) = hi_value;
c85bc7dd
JRZ
2656 }
2657
2658 cr = bcd_cmp_zero(b);
2659
2660 if (unlikely(invalid)) {
2661 cr = CRF_SO;
2662 }
2663
2664 return cr;
2665}
2666
c3025c3b
JRZ
2667uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2668{
2669 int i;
2670 int invalid = 0;
2671
2672 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2673 return CRF_SO;
2674 }
2675
2676 *r = *a;
428115c3 2677 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
c3025c3b
JRZ
2678
2679 for (i = 1; i < 32; i++) {
2680 bcd_get_digit(a, i, &invalid);
2681 bcd_get_digit(b, i, &invalid);
2682 if (unlikely(invalid)) {
2683 return CRF_SO;
2684 }
2685 }
2686
2687 return bcd_cmp_zero(r);
2688}
2689
466a3f9c
JRZ
2690uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2691{
466a3f9c
JRZ
2692 int sgnb = bcd_get_sgn(b);
2693
2694 *r = *b;
2695 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2696
071663df
JRZ
2697 if (bcd_is_valid(b) == false) {
2698 return CRF_SO;
466a3f9c
JRZ
2699 }
2700
2701 return bcd_cmp_zero(r);
2702}
2703
e04797f7
JRZ
2704uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2705{
2706 int cr;
428115c3 2707 int i = a->VsrSB(7);
e04797f7
JRZ
2708 bool ox_flag = false;
2709 int sgnb = bcd_get_sgn(b);
2710 ppc_avr_t ret = *b;
3c385a93 2711 ret.VsrD(1) &= ~0xf;
e04797f7
JRZ
2712
2713 if (bcd_is_valid(b) == false) {
2714 return CRF_SO;
2715 }
2716
2717 if (unlikely(i > 31)) {
2718 i = 31;
2719 } else if (unlikely(i < -31)) {
2720 i = -31;
2721 }
2722
2723 if (i > 0) {
3c385a93 2724 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
e04797f7 2725 } else {
3c385a93 2726 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
e04797f7
JRZ
2727 }
2728 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2729
2730 *r = ret;
2731
2732 cr = bcd_cmp_zero(r);
2733 if (ox_flag) {
2734 cr |= CRF_SO;
2735 }
2736
2737 return cr;
2738}
2739
a49a95e9
JRZ
2740uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2741{
2742 int cr;
2743 int i;
2744 int invalid = 0;
2745 bool ox_flag = false;
2746 ppc_avr_t ret = *b;
2747
2748 for (i = 0; i < 32; i++) {
2749 bcd_get_digit(b, i, &invalid);
2750
2751 if (unlikely(invalid)) {
2752 return CRF_SO;
2753 }
2754 }
2755
428115c3 2756 i = a->VsrSB(7);
a49a95e9
JRZ
2757 if (i >= 32) {
2758 ox_flag = true;
3c385a93 2759 ret.VsrD(1) = ret.VsrD(0) = 0;
a49a95e9 2760 } else if (i <= -32) {
3c385a93 2761 ret.VsrD(1) = ret.VsrD(0) = 0;
a49a95e9 2762 } else if (i > 0) {
3c385a93 2763 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
a49a95e9 2764 } else {
3c385a93 2765 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
a49a95e9
JRZ
2766 }
2767 *r = ret;
2768
2769 cr = bcd_cmp_zero(r);
2770 if (ox_flag) {
2771 cr |= CRF_SO;
2772 }
2773
2774 return cr;
2775}
2776
a54238ad
JRZ
2777uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2778{
2779 int cr;
2780 int unused = 0;
2781 int invalid = 0;
2782 bool ox_flag = false;
2783 int sgnb = bcd_get_sgn(b);
2784 ppc_avr_t ret = *b;
3c385a93 2785 ret.VsrD(1) &= ~0xf;
a54238ad 2786
428115c3
MCA
2787 int i = a->VsrSB(7);
2788 ppc_avr_t bcd_one;
2789
2790 bcd_one.VsrD(0) = 0;
2791 bcd_one.VsrD(1) = 0x10;
a54238ad
JRZ
2792
2793 if (bcd_is_valid(b) == false) {
2794 return CRF_SO;
2795 }
2796
2797 if (unlikely(i > 31)) {
2798 i = 31;
2799 } else if (unlikely(i < -31)) {
2800 i = -31;
2801 }
2802
2803 if (i > 0) {
3c385a93 2804 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
a54238ad 2805 } else {
3c385a93 2806 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
a54238ad
JRZ
2807
2808 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2809 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2810 }
2811 }
2812 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2813
2814 cr = bcd_cmp_zero(&ret);
2815 if (ox_flag) {
2816 cr |= CRF_SO;
2817 }
2818 *r = ret;
2819
2820 return cr;
2821}
2822
31bc4d11
JRZ
2823uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2824{
2825 uint64_t mask;
2826 uint32_t ox_flag = 0;
428115c3 2827 int i = a->VsrSH(3) + 1;
31bc4d11
JRZ
2828 ppc_avr_t ret = *b;
2829
2830 if (bcd_is_valid(b) == false) {
2831 return CRF_SO;
2832 }
2833
2834 if (i > 16 && i < 32) {
2835 mask = (uint64_t)-1 >> (128 - i * 4);
3c385a93 2836 if (ret.VsrD(0) & ~mask) {
31bc4d11
JRZ
2837 ox_flag = CRF_SO;
2838 }
2839
3c385a93 2840 ret.VsrD(0) &= mask;
31bc4d11
JRZ
2841 } else if (i >= 0 && i <= 16) {
2842 mask = (uint64_t)-1 >> (64 - i * 4);
3c385a93 2843 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
31bc4d11
JRZ
2844 ox_flag = CRF_SO;
2845 }
2846
3c385a93
MCA
2847 ret.VsrD(1) &= mask;
2848 ret.VsrD(0) = 0;
31bc4d11
JRZ
2849 }
2850 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2851 *r = ret;
2852
2853 return bcd_cmp_zero(&ret) | ox_flag;
2854}
2855
5c32e2e4
JRZ
2856uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2857{
2858 int i;
2859 uint64_t mask;
2860 uint32_t ox_flag = 0;
2861 int invalid = 0;
2862 ppc_avr_t ret = *b;
2863
2864 for (i = 0; i < 32; i++) {
2865 bcd_get_digit(b, i, &invalid);
2866
2867 if (unlikely(invalid)) {
2868 return CRF_SO;
2869 }
2870 }
2871
428115c3 2872 i = a->VsrSH(3);
5c32e2e4
JRZ
2873 if (i > 16 && i < 33) {
2874 mask = (uint64_t)-1 >> (128 - i * 4);
3c385a93 2875 if (ret.VsrD(0) & ~mask) {
5c32e2e4
JRZ
2876 ox_flag = CRF_SO;
2877 }
2878
3c385a93 2879 ret.VsrD(0) &= mask;
5c32e2e4
JRZ
2880 } else if (i > 0 && i <= 16) {
2881 mask = (uint64_t)-1 >> (64 - i * 4);
3c385a93 2882 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
5c32e2e4
JRZ
2883 ox_flag = CRF_SO;
2884 }
2885
3c385a93
MCA
2886 ret.VsrD(1) &= mask;
2887 ret.VsrD(0) = 0;
5c32e2e4 2888 } else if (i == 0) {
3c385a93 2889 if (ret.VsrD(0) || ret.VsrD(1)) {
5c32e2e4
JRZ
2890 ox_flag = CRF_SO;
2891 }
3c385a93 2892 ret.VsrD(0) = ret.VsrD(1) = 0;
5c32e2e4
JRZ
2893 }
2894
2895 *r = ret;
3c385a93 2896 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
5c32e2e4
JRZ
2897 return ox_flag | CRF_EQ;
2898 }
2899
2900 return ox_flag | CRF_GT;
2901}
2902
c1542453 2903void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2904{
2905 int i;
2906 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2907 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2908 }
2909}
2910
c1542453 2911void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2912{
65cf1f65 2913 ppc_avr_t result;
557d52fa 2914 int i;
557d52fa 2915
c1542453 2916 VECTOR_FOR_INORDER_I(i, u32) {
2dea57db
MCA
2917 result.VsrW(i) = b->VsrW(i) ^
2918 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2919 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2920 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2921 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
557d52fa 2922 }
65cf1f65 2923 *r = result;
557d52fa
TM
2924}
2925
557d52fa
TM
2926void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2927{
65cf1f65 2928 ppc_avr_t result;
c1542453
TM
2929 int i;
2930
2931 VECTOR_FOR_INORDER_I(i, u8) {
2dea57db 2932 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
c1542453 2933 }
65cf1f65 2934 *r = result;
557d52fa
TM
2935}
2936
2937void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2938{
2939 /* This differs from what is written in ISA V2.07. The RTL is */
2940 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2941 int i;
2942 ppc_avr_t tmp;
2943
2944 VECTOR_FOR_INORDER_I(i, u8) {
2dea57db 2945 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
c1542453
TM
2946 }
2947
2948 VECTOR_FOR_INORDER_I(i, u32) {
2dea57db
MCA
2949 r->VsrW(i) =
2950 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2951 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2952 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2953 AES_imc[tmp.VsrB(4 * i + 3)][3];
c1542453 2954 }
557d52fa
TM
2955}
2956
2957void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2958{
65cf1f65 2959 ppc_avr_t result;
c1542453
TM
2960 int i;
2961
2962 VECTOR_FOR_INORDER_I(i, u8) {
2dea57db 2963 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
c1542453 2964 }
65cf1f65 2965 *r = result;
557d52fa
TM
2966}
2967
57354f8f
TM
2968void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2969{
2970 int st = (st_six & 0x10) != 0;
2971 int six = st_six & 0xF;
2972 int i;
2973
730d2ca3 2974 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
57354f8f
TM
2975 if (st == 0) {
2976 if ((six & (0x8 >> i)) == 0) {
0ef83bf2
MCA
2977 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2978 ror32(a->VsrW(i), 18) ^
730d2ca3 2979 (a->VsrW(i) >> 3);
57354f8f 2980 } else { /* six.bit[i] == 1 */
0ef83bf2
MCA
2981 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2982 ror32(a->VsrW(i), 19) ^
730d2ca3 2983 (a->VsrW(i) >> 10);
57354f8f
TM
2984 }
2985 } else { /* st == 1 */
2986 if ((six & (0x8 >> i)) == 0) {
0ef83bf2
MCA
2987 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2988 ror32(a->VsrW(i), 13) ^
2989 ror32(a->VsrW(i), 22);
57354f8f 2990 } else { /* six.bit[i] == 1 */
0ef83bf2
MCA
2991 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2992 ror32(a->VsrW(i), 11) ^
2993 ror32(a->VsrW(i), 25);
57354f8f
TM
2994 }
2995 }
2996 }
2997}
2998
57354f8f
TM
2999void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3000{
3001 int st = (st_six & 0x10) != 0;
3002 int six = st_six & 0xF;
3003 int i;
3004
730d2ca3 3005 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
57354f8f 3006 if (st == 0) {
b6cb41b2 3007 if ((six & (0x8 >> (2 * i))) == 0) {
0ef83bf2
MCA
3008 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3009 ror64(a->VsrD(i), 8) ^
730d2ca3 3010 (a->VsrD(i) >> 7);
57354f8f 3011 } else { /* six.bit[2*i] == 1 */
0ef83bf2
MCA
3012 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3013 ror64(a->VsrD(i), 61) ^
730d2ca3 3014 (a->VsrD(i) >> 6);
57354f8f
TM
3015 }
3016 } else { /* st == 1 */
b6cb41b2 3017 if ((six & (0x8 >> (2 * i))) == 0) {
0ef83bf2
MCA
3018 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3019 ror64(a->VsrD(i), 34) ^
3020 ror64(a->VsrD(i), 39);
57354f8f 3021 } else { /* six.bit[2*i] == 1 */
0ef83bf2
MCA
3022 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3023 ror64(a->VsrD(i), 18) ^
3024 ror64(a->VsrD(i), 41);
57354f8f
TM
3025 }
3026 }
3027 }
3028}
3029
ac174549
TM
3030void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3031{
65cf1f65 3032 ppc_avr_t result;
ac174549 3033 int i;
65cf1f65 3034
60594fea
MCA
3035 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3036 int indexA = c->VsrB(i) >> 4;
3037 int indexB = c->VsrB(i) & 0xF;
3038
3039 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
ac174549 3040 }
65cf1f65 3041 *r = result;
ac174549
TM
3042}
3043
64654ded 3044#undef VECTOR_FOR_INORDER_I
64654ded
BS
3045
3046/*****************************************************************************/
3047/* SPE extension helpers */
3048/* Use a table to make this quicker */
ea6c0dac 3049static const uint8_t hbrev[16] = {
64654ded
BS
3050 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3051 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3052};
3053
3054static inline uint8_t byte_reverse(uint8_t val)
3055{
3056 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3057}
3058
3059static inline uint32_t word_reverse(uint32_t val)
3060{
3061 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3062 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3063}
3064
3065#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3066target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3067{
3068 uint32_t a, b, d, mask;
3069
3070 mask = UINT32_MAX >> (32 - MASKBITS);
3071 a = arg1 & mask;
3072 b = arg2 & mask;
3073 d = word_reverse(1 + word_reverse(a | ~b));
3074 return (arg1 & ~mask) | (d & b);
3075}
3076
3077uint32_t helper_cntlsw32(uint32_t val)
3078{
3079 if (val & 0x80000000) {
3080 return clz32(~val);
3081 } else {
3082 return clz32(val);
3083 }
3084}
3085
3086uint32_t helper_cntlzw32(uint32_t val)
3087{
3088 return clz32(val);
3089}
3090
3091/* 440 specific */
d15f74fb
BS
3092target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3093 target_ulong low, uint32_t update_Rc)
64654ded
BS
3094{
3095 target_ulong mask;
3096 int i;
3097
3098 i = 1;
3099 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3100 if ((high & mask) == 0) {
3101 if (update_Rc) {
3102 env->crf[0] = 0x4;
3103 }
3104 goto done;
3105 }
3106 i++;
3107 }
3108 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3109 if ((low & mask) == 0) {
3110 if (update_Rc) {
3111 env->crf[0] = 0x8;
3112 }
3113 goto done;
3114 }
3115 i++;
3116 }
ebbd8b40 3117 i = 8;
64654ded
BS
3118 if (update_Rc) {
3119 env->crf[0] = 0x2;
3120 }
3121 done:
3122 env->xer = (env->xer & ~0x7F) | i;
3123 if (update_Rc) {
3124 env->crf[0] |= xer_so;
3125 }
3126 return i;
3127}