]> git.proxmox.com Git - mirror_qemu.git/blame - target/ppc/int_helper.c
target/ppc: Use clmul_64
[mirror_qemu.git] / target / ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
6bd039cd 9 * version 2.1 of the License, or (at your option) any later version.
64654ded
BS
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
db725815 19
0d75590d 20#include "qemu/osdep.h"
64654ded 21#include "cpu.h"
3e00884f 22#include "internal.h"
1de7afc9 23#include "qemu/host-utils.h"
8a05fd9a 24#include "qemu/log.h"
2ef6175a 25#include "exec/helper-proto.h"
6f2945cd 26#include "crypto/aes.h"
7df34e48 27#include "crypto/aes-round.h"
cec4090d 28#include "crypto/clmul.h"
24f91e81 29#include "fpu/softfloat.h"
3f74b632
RH
30#include "qapi/error.h"
31#include "qemu/guest-random.h"
1015fcab 32#include "tcg/tcg-gvec-desc.h"
64654ded
BS
33
34#include "helper_regs.h"
35/*****************************************************************************/
36/* Fixed point operations helpers */
64654ded 37
f32899de
ND
38static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
39{
40 if (unlikely(ov)) {
af721a31 41 env->so = env->ov = env->ov32 = 1;
f32899de 42 } else {
af721a31 43 env->ov = env->ov32 = 0;
f32899de
ND
44 }
45}
46
6a4fda33
TM
47target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
48 uint32_t oe)
49{
50 uint64_t rt = 0;
51 int overflow = 0;
52
53 uint64_t dividend = (uint64_t)ra << 32;
54 uint64_t divisor = (uint32_t)rb;
55
56 if (unlikely(divisor == 0)) {
57 overflow = 1;
58 } else {
59 rt = dividend / divisor;
60 overflow = rt > UINT32_MAX;
61 }
62
63 if (unlikely(overflow)) {
64 rt = 0; /* Undefined */
65 }
66
67 if (oe) {
f32899de 68 helper_update_ov_legacy(env, overflow);
6a4fda33
TM
69 }
70
71 return (target_ulong)rt;
72}
73
a98eb9e9
TM
74target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
75 uint32_t oe)
76{
77 int64_t rt = 0;
78 int overflow = 0;
79
80 int64_t dividend = (int64_t)ra << 32;
81 int64_t divisor = (int64_t)((int32_t)rb);
82
83 if (unlikely((divisor == 0) ||
84 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
85 overflow = 1;
86 } else {
87 rt = dividend / divisor;
88 overflow = rt != (int32_t)rt;
89 }
90
91 if (unlikely(overflow)) {
92 rt = 0; /* Undefined */
93 }
94
95 if (oe) {
f32899de 96 helper_update_ov_legacy(env, overflow);
a98eb9e9
TM
97 }
98
99 return (target_ulong)rt;
100}
101
98d1eb27
TM
102#if defined(TARGET_PPC64)
103
104uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
105{
106 uint64_t rt = 0;
107 int overflow = 0;
108
9276a31c
LP
109 if (unlikely(rb == 0 || ra >= rb)) {
110 overflow = 1;
98d1eb27 111 rt = 0; /* Undefined */
9276a31c
LP
112 } else {
113 divu128(&rt, &ra, rb);
98d1eb27
TM
114 }
115
116 if (oe) {
f32899de 117 helper_update_ov_legacy(env, overflow);
98d1eb27
TM
118 }
119
120 return rt;
121}
122
e44259b6
TM
123uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
124{
40f3e79a 125 uint64_t rt = 0;
e44259b6
TM
126 int64_t ra = (int64_t)rau;
127 int64_t rb = (int64_t)rbu;
9276a31c 128 int overflow = 0;
e44259b6 129
9276a31c
LP
130 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
131 overflow = 1;
e44259b6 132 rt = 0; /* Undefined */
9276a31c
LP
133 } else {
134 divs128(&rt, &ra, rb);
e44259b6
TM
135 }
136
137 if (oe) {
f32899de 138 helper_update_ov_legacy(env, overflow);
e44259b6
TM
139 }
140
141 return rt;
142}
143
98d1eb27
TM
144#endif
145
146
64654ded 147#if defined(TARGET_PPC64)
082ce330
ND
148/* if x = 0xab, returns 0xababababababababa */
149#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
150
b6cb41b2
DG
151/*
152 * subtract 1 from each byte, and with inverse, check if MSB is set at each
082ce330
ND
153 * byte.
154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
155 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
156 */
157#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
158
159/* When you XOR the pattern and there is a match, that byte will be zero */
160#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
161
162uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
163{
efa73196 164 return hasvalue(rb, ra) ? CRF_GT : 0;
082ce330
ND
165}
166
167#undef pattern
168#undef haszero
169#undef hasvalue
170
b6cb41b2 171/*
3f74b632 172 * Return a random number.
fec5c62a 173 */
3f74b632 174uint64_t helper_darn32(void)
fec5c62a 175{
3f74b632
RH
176 Error *err = NULL;
177 uint32_t ret;
178
179 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
180 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
181 error_get_pretty(err));
182 error_free(err);
183 return -1;
184 }
fec5c62a 185
3f74b632 186 return ret;
fec5c62a
RB
187}
188
3f74b632
RH
189uint64_t helper_darn64(void)
190{
191 Error *err = NULL;
192 uint64_t ret;
193
194 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
195 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
196 error_get_pretty(err));
197 error_free(err);
198 return -1;
199 }
64654ded 200
3f74b632
RH
201 return ret;
202}
86ba37ed
TM
203
204uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
205{
206 int i;
207 uint64_t ra = 0;
208
209 for (i = 0; i < 8; i++) {
b6cb41b2 210 int index = (rs >> (i * 8)) & 0xFF;
86ba37ed 211 if (index < 64) {
a6a444a8 212 if (rb & PPC_BIT(index)) {
86ba37ed
TM
213 ra |= 1 << i;
214 }
215 }
216 }
217 return ra;
218}
219
220#endif
221
fcfda20f
AJ
222target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
223{
224 target_ulong mask = 0xff;
225 target_ulong ra = 0;
226 int i;
227
228 for (i = 0; i < sizeof(target_ulong); i++) {
229 if ((rs & mask) == (rb & mask)) {
230 ra |= mask;
231 }
232 mask <<= 8;
233 }
234 return ra;
235}
236
64654ded 237/* shift right arithmetic helper */
d15f74fb
BS
238target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
239 target_ulong shift)
64654ded
BS
240{
241 int32_t ret;
242
243 if (likely(!(shift & 0x20))) {
244 if (likely((uint32_t)shift != 0)) {
245 shift &= 0x1f;
246 ret = (int32_t)value >> shift;
247 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
af1c259f 248 env->ca32 = env->ca = 0;
64654ded 249 } else {
af1c259f 250 env->ca32 = env->ca = 1;
64654ded
BS
251 }
252 } else {
253 ret = (int32_t)value;
af1c259f 254 env->ca32 = env->ca = 0;
64654ded
BS
255 }
256 } else {
257 ret = (int32_t)value >> 31;
af1c259f 258 env->ca32 = env->ca = (ret != 0);
64654ded
BS
259 }
260 return (target_long)ret;
261}
262
263#if defined(TARGET_PPC64)
d15f74fb
BS
264target_ulong helper_srad(CPUPPCState *env, target_ulong value,
265 target_ulong shift)
64654ded
BS
266{
267 int64_t ret;
268
269 if (likely(!(shift & 0x40))) {
270 if (likely((uint64_t)shift != 0)) {
271 shift &= 0x3f;
272 ret = (int64_t)value >> shift;
4bc02e23 273 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
af1c259f 274 env->ca32 = env->ca = 0;
64654ded 275 } else {
af1c259f 276 env->ca32 = env->ca = 1;
64654ded
BS
277 }
278 } else {
279 ret = (int64_t)value;
af1c259f 280 env->ca32 = env->ca = 0;
64654ded
BS
281 }
282 } else {
283 ret = (int64_t)value >> 63;
af1c259f 284 env->ca32 = env->ca = (ret != 0);
64654ded
BS
285 }
286 return ret;
287}
288#endif
289
290#if defined(TARGET_PPC64)
291target_ulong helper_popcntb(target_ulong val)
292{
79770002 293 /* Note that we don't fold past bytes */
64654ded
BS
294 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
295 0x5555555555555555ULL);
296 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
297 0x3333333333333333ULL);
298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
299 0x0f0f0f0f0f0f0f0fULL);
300 return val;
301}
302
303target_ulong helper_popcntw(target_ulong val)
304{
79770002 305 /* Note that we don't fold past words. */
64654ded
BS
306 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
307 0x5555555555555555ULL);
308 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
309 0x3333333333333333ULL);
310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
311 0x0f0f0f0f0f0f0f0fULL);
312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
313 0x00ff00ff00ff00ffULL);
314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
315 0x0000ffff0000ffffULL);
316 return val;
317}
64654ded
BS
318#else
319target_ulong helper_popcntb(target_ulong val)
320{
79770002 321 /* Note that we don't fold past bytes */
64654ded
BS
322 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
323 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
324 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
325 return val;
326}
64654ded
BS
327#endif
328
6e0bbc40 329uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
89ccd7dc
MF
330{
331 /*
332 * Instead of processing the mask bit-by-bit from the most significant to
333 * the least significant bit, as described in PowerISA, we'll handle it in
334 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
335 * ctz or cto, we negate the mask at the end of the loop.
336 */
337 target_ulong m, left = 0, right = 0;
338 unsigned int n, i = 64;
339 bool bit = false; /* tracks if we are processing zeros or ones */
340
341 if (mask == 0 || mask == -1) {
342 return src;
343 }
344
345 /* Processes the mask in blocks, from LSB to MSB */
346 while (i) {
347 /* Find how many bits we should take */
348 n = ctz64(mask);
349 if (n > i) {
350 n = i;
351 }
352
353 /*
354 * Extracts 'n' trailing bits of src and put them on the leading 'n'
355 * bits of 'right' or 'left', pushing down the previously extracted
356 * values.
357 */
358 m = (1ll << n) - 1;
359 if (bit) {
360 right = ror64(right | (src & m), n);
361 } else {
362 left = ror64(left | (src & m), n);
363 }
364
365 /*
366 * Discards the processed bits from 'src' and 'mask'. Note that we are
367 * removing 'n' trailing zeros from 'mask', but the logical shift will
368 * add 'n' leading zeros back, so the population count of 'mask' is kept
369 * the same.
370 */
371 src >>= n;
372 mask >>= n;
373 i -= n;
374 bit = !bit;
375 mask = ~mask;
376 }
377
378 /*
379 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
380 * we'll shift it more 64-ctpop(mask) times.
381 */
382 if (bit) {
383 n = ctpop64(mask);
384 } else {
385 n = 64 - ctpop64(mask);
386 }
387
388 return left | (right >> n);
389}
390
21ba6e58
MF
391uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
392{
393 int i, o;
394 uint64_t result = 0;
395
396 if (mask == -1) {
397 return src;
398 }
399
400 for (i = 0; mask != 0; i++) {
401 o = ctz64(mask);
402 mask &= mask - 1;
403 result |= ((src >> i) & 1) << o;
404 }
405
406 return result;
407}
8bdb7606
MF
408
409uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
410{
411 int i, o;
412 uint64_t result = 0;
413
414 if (mask == -1) {
415 return src;
416 }
417
418 for (o = 0; mask != 0; o++) {
419 i = ctz64(mask);
420 mask &= mask - 1;
421 result |= ((src >> i) & 1) << o;
422 }
423
424 return result;
425}
21ba6e58 426
64654ded
BS
427/*****************************************************************************/
428/* Altivec extension helpers */
e03b5686 429#if HOST_BIG_ENDIAN
64654ded
BS
430#define VECTOR_FOR_INORDER_I(index, element) \
431 for (index = 0; index < ARRAY_SIZE(r->element); index++)
432#else
433#define VECTOR_FOR_INORDER_I(index, element) \
b6cb41b2 434 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
64654ded
BS
435#endif
436
64654ded
BS
437/* Saturating arithmetic helpers. */
438#define SATCVT(from, to, from_type, to_type, min, max) \
439 static inline to_type cvt##from##to(from_type x, int *sat) \
440 { \
441 to_type r; \
442 \
443 if (x < (from_type)min) { \
444 r = min; \
445 *sat = 1; \
446 } else if (x > (from_type)max) { \
447 r = max; \
448 *sat = 1; \
449 } else { \
450 r = x; \
451 } \
452 return r; \
453 }
454#define SATCVTU(from, to, from_type, to_type, min, max) \
455 static inline to_type cvt##from##to(from_type x, int *sat) \
456 { \
457 to_type r; \
458 \
459 if (x > (from_type)max) { \
460 r = max; \
461 *sat = 1; \
462 } else { \
463 r = x; \
464 } \
465 return r; \
466 }
467SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
468SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
469SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
470
471SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
472SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
473SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
474SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
475SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
476SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
477#undef SATCVT
478#undef SATCVTU
479
dedfaac7 480void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
64654ded 481{
c19940db 482 ppc_store_vscr(env, vscr);
64654ded
BS
483}
484
cc2b90d7
RH
485uint32_t helper_mfvscr(CPUPPCState *env)
486{
c19940db 487 return ppc_get_vscr(env);
cc2b90d7
RH
488}
489
6175f5a0
RH
490static inline void set_vscr_sat(CPUPPCState *env)
491{
9b5b74da
RH
492 /* The choice of non-zero value is arbitrary. */
493 env->vscr_sat.u32[0] = 1;
6175f5a0
RH
494}
495
5c69452c 496/* vprtybq */
d57fbd8f 497void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
5c69452c
AK
498{
499 uint64_t res = b->u64[0] ^ b->u64[1];
500 res ^= res >> 32;
501 res ^= res >> 16;
502 res ^= res >> 8;
3c385a93
MCA
503 r->VsrD(1) = res & 1;
504 r->VsrD(0) = 0;
5c69452c
AK
505}
506
64654ded 507#define VARITHFP(suffix, func) \
d15f74fb
BS
508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
509 ppc_avr_t *b) \
64654ded
BS
510 { \
511 int i; \
512 \
05ee3e8a
MCA
513 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
514 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
64654ded
BS
515 } \
516 }
517VARITHFP(addfp, float32_add)
518VARITHFP(subfp, float32_sub)
db1babb8
AJ
519VARITHFP(minfp, float32_min)
520VARITHFP(maxfp, float32_max)
64654ded
BS
521#undef VARITHFP
522
2f93c23f
AJ
523#define VARITHFPFMA(suffix, type) \
524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
525 ppc_avr_t *b, ppc_avr_t *c) \
526 { \
527 int i; \
05ee3e8a
MCA
528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
529 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
530 type, &env->vec_status); \
2f93c23f
AJ
531 } \
532 }
533VARITHFPFMA(maddfp, 0);
534VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
535#undef VARITHFPFMA
536
64654ded
BS
537#define VARITHSAT_CASE(type, op, cvt, element) \
538 { \
539 type result = (type)a->element[i] op (type)b->element[i]; \
540 r->element[i] = cvt(result, &sat); \
541 }
542
543#define VARITHSAT_DO(name, op, optype, cvt, element) \
fb11ae7d
RH
544 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
545 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
64654ded
BS
546 { \
547 int sat = 0; \
548 int i; \
549 \
550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
fb11ae7d 551 VARITHSAT_CASE(optype, op, cvt, element); \
64654ded
BS
552 } \
553 if (sat) { \
fb11ae7d 554 vscr_sat->u32[0] = 1; \
64654ded
BS
555 } \
556 }
557#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
558 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
559 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
560#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
561 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
562 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
563VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
564VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
565VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
566VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
567VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
568VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
569#undef VARITHSAT_CASE
570#undef VARITHSAT_DO
571#undef VARITHSAT_SIGNED
572#undef VARITHSAT_UNSIGNED
573
c85929b2
LMC
574#define VAVG(name, element, etype) \
575 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
576 { \
577 int i; \
578 \
579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
580 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
581 r->element[i] = x >> 1; \
582 } \
64654ded
BS
583 }
584
c85929b2
LMC
585VAVG(VAVGSB, s8, int16_t)
586VAVG(VAVGUB, u8, uint16_t)
587VAVG(VAVGSH, s16, int32_t)
588VAVG(VAVGUH, u16, uint32_t)
589VAVG(VAVGSW, s32, int64_t)
590VAVG(VAVGUW, u32, uint64_t)
64654ded
BS
591#undef VAVG
592
26c964f8
LMC
593#define VABSDU(name, element) \
594void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
37707059
SD
595{ \
596 int i; \
597 \
598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
599 r->element[i] = (a->element[i] > b->element[i]) ? \
600 (a->element[i] - b->element[i]) : \
601 (b->element[i] - a->element[i]); \
602 } \
603}
604
b6cb41b2
DG
605/*
606 * VABSDU - Vector absolute difference unsigned
37707059
SD
607 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
608 * element - element type to access from vector
609 */
26c964f8
LMC
610VABSDU(VABSDUB, u8)
611VABSDU(VABSDUH, u16)
612VABSDU(VABSDUW, u32)
37707059
SD
613#undef VABSDU
614
64654ded 615#define VCF(suffix, cvt, element) \
d15f74fb
BS
616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
617 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
618 { \
619 int i; \
620 \
05ee3e8a 621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
64654ded 622 float32 t = cvt(b->element[i], &env->vec_status); \
05ee3e8a 623 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
64654ded
BS
624 } \
625 }
626VCF(ux, uint32_to_float32, u32)
627VCF(sx, int32_to_float32, s32)
628#undef VCF
629
eb936dc0
MF
630#define VCMPNEZ(NAME, ELEM) \
631void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
632{ \
633 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
634 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
635 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
636 } \
f7cc8466 637}
eb936dc0
MF
638VCMPNEZ(VCMPNEZB, u8)
639VCMPNEZ(VCMPNEZH, u16)
640VCMPNEZ(VCMPNEZW, u32)
641#undef VCMPNEZ
f7cc8466 642
64654ded 643#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
644 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
645 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
646 { \
647 uint32_t ones = (uint32_t)-1; \
648 uint32_t all = ones; \
649 uint32_t none = 0; \
650 int i; \
651 \
05ee3e8a 652 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
64654ded 653 uint32_t result; \
71bfd65c
RH
654 FloatRelation rel = \
655 float32_compare_quiet(a->f32[i], b->f32[i], \
656 &env->vec_status); \
64654ded
BS
657 if (rel == float_relation_unordered) { \
658 result = 0; \
659 } else if (rel compare order) { \
660 result = ones; \
661 } else { \
662 result = 0; \
663 } \
664 r->u32[i] = result; \
665 all &= result; \
666 none |= result; \
667 } \
668 if (record) { \
669 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
670 } \
671 }
672#define VCMPFP(suffix, compare, order) \
673 VCMPFP_DO(suffix, compare, order, 0) \
674 VCMPFP_DO(suffix##_dot, compare, order, 1)
675VCMPFP(eqfp, ==, float_relation_equal)
676VCMPFP(gefp, !=, float_relation_less)
677VCMPFP(gtfp, ==, float_relation_greater)
678#undef VCMPFP_DO
679#undef VCMPFP
680
d15f74fb
BS
681static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
682 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
683{
684 int i;
685 int all_in = 0;
686
05ee3e8a 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
71bfd65c
RH
688 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
689 &env->vec_status);
64654ded
BS
690 if (le_rel == float_relation_unordered) {
691 r->u32[i] = 0xc0000000;
4007b8de 692 all_in = 1;
64654ded 693 } else {
05ee3e8a 694 float32 bneg = float32_chs(b->f32[i]);
71bfd65c
RH
695 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
696 &env->vec_status);
64654ded
BS
697 int le = le_rel != float_relation_greater;
698 int ge = ge_rel != float_relation_less;
699
700 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
701 all_in |= (!le | !ge);
702 }
703 }
704 if (record) {
705 env->crf[6] = (all_in == 0) << 1;
706 }
707}
708
d15f74fb 709void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 710{
d15f74fb 711 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
712}
713
d15f74fb
BS
714void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
715 ppc_avr_t *b)
64654ded 716{
d15f74fb 717 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
718}
719
720#define VCT(suffix, satcvt, element) \
d15f74fb
BS
721 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
722 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
723 { \
724 int i; \
725 int sat = 0; \
726 float_status s = env->vec_status; \
727 \
728 set_float_rounding_mode(float_round_to_zero, &s); \
05ee3e8a
MCA
729 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
730 if (float32_is_any_nan(b->f32[i])) { \
64654ded
BS
731 r->element[i] = 0; \
732 } else { \
05ee3e8a 733 float64 t = float32_to_float64(b->f32[i], &s); \
64654ded
BS
734 int64_t j; \
735 \
736 t = float64_scalbn(t, uim, &s); \
737 j = float64_to_int64(t, &s); \
738 r->element[i] = satcvt(j, &sat); \
739 } \
740 } \
741 if (sat) { \
6175f5a0 742 set_vscr_sat(env); \
64654ded
BS
743 } \
744 }
745VCT(uxs, cvtsduw, u32)
746VCT(sxs, cvtsdsw, s32)
747#undef VCT
748
34553153
LMC
749typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
750
751static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
752{
753 int64_t psum = 0;
754 for (int i = 0; i < 8; i++, mask >>= 1) {
755 if (mask & 1) {
feeef6b6 756 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
34553153
LMC
757 }
758 }
759 return psum;
760}
761
762static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
763{
764 int64_t psum = 0;
765 for (int i = 0; i < 4; i++, mask >>= 1) {
766 if (mask & 1) {
767 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
768 }
769 }
770 return psum;
771}
772
773static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
774{
775 int64_t psum = 0;
776 for (int i = 0; i < 2; i++, mask >>= 1) {
777 if (mask & 1) {
feeef6b6
DHB
778 psum += (int64_t)sextract32(a, 16 * i, 16) *
779 sextract32(b, 16 * i, 16);
34553153
LMC
780 }
781 }
782 return psum;
783}
784
785static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
786 uint32_t mask, bool sat, bool acc, do_ger ger)
787{
788 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
789 xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
790 ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
791 uint8_t xmsk_bit, ymsk_bit;
792 int64_t psum;
793 int i, j;
794 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
795 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
796 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
797 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
798 if (acc) {
799 psum += at[i].VsrSW(j);
800 }
801 if (sat && psum > INT32_MAX) {
802 set_vscr_sat(env);
803 at[i].VsrSW(j) = INT32_MAX;
804 } else if (sat && psum < INT32_MIN) {
805 set_vscr_sat(env);
806 at[i].VsrSW(j) = INT32_MIN;
807 } else {
808 at[i].VsrSW(j) = (int32_t) psum;
809 }
810 } else {
811 at[i].VsrSW(j) = 0;
812 }
813 }
814 }
815}
816
817QEMU_FLATTEN
818void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
819 ppc_acc_t *at, uint32_t mask)
820{
821 xviger(env, a, b, at, mask, false, false, ger_rank8);
822}
823
824QEMU_FLATTEN
825void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
826 ppc_acc_t *at, uint32_t mask)
827{
828 xviger(env, a, b, at, mask, false, true, ger_rank8);
829}
830
831QEMU_FLATTEN
832void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
833 ppc_acc_t *at, uint32_t mask)
834{
835 xviger(env, a, b, at, mask, false, false, ger_rank4);
836}
837
838QEMU_FLATTEN
839void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
840 ppc_acc_t *at, uint32_t mask)
841{
842 xviger(env, a, b, at, mask, false, true, ger_rank4);
843}
844
845QEMU_FLATTEN
846void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
847 ppc_acc_t *at, uint32_t mask)
848{
849 xviger(env, a, b, at, mask, true, true, ger_rank4);
850}
851
852QEMU_FLATTEN
853void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
854 ppc_acc_t *at, uint32_t mask)
855{
856 xviger(env, a, b, at, mask, false, false, ger_rank2);
857}
858
859QEMU_FLATTEN
860void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
861 ppc_acc_t *at, uint32_t mask)
862{
863 xviger(env, a, b, at, mask, true, false, ger_rank2);
864}
865
866QEMU_FLATTEN
867void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
868 ppc_acc_t *at, uint32_t mask)
869{
870 xviger(env, a, b, at, mask, false, true, ger_rank2);
871}
872
873QEMU_FLATTEN
874void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
875 ppc_acc_t *at, uint32_t mask)
876{
877 xviger(env, a, b, at, mask, true, true, ger_rank2);
878}
879
4879538c
RS
880target_ulong helper_vclzlsbb(ppc_avr_t *r)
881{
882 target_ulong count = 0;
883 int i;
60594fea
MCA
884 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
885 if (r->VsrB(i) & 0x01) {
4879538c
RS
886 break;
887 }
888 count++;
889 }
890 return count;
891}
892
893target_ulong helper_vctzlsbb(ppc_avr_t *r)
894{
895 target_ulong count = 0;
896 int i;
4879538c 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
60594fea 898 if (r->VsrB(i) & 0x01) {
4879538c
RS
899 break;
900 }
901 count++;
902 }
903 return count;
904}
905
306e4753 906void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
d15f74fb 907 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
908{
909 int sat = 0;
910 int i;
911
912 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
913 int32_t prod = a->s16[i] * b->s16[i];
914 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
915
916 r->s16[i] = cvtswsh(t, &sat);
917 }
918
919 if (sat) {
6175f5a0 920 set_vscr_sat(env);
64654ded
BS
921 }
922}
923
306e4753 924void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
d15f74fb 925 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
926{
927 int sat = 0;
928 int i;
929
930 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
931 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
932 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
933 r->s16[i] = cvtswsh(t, &sat);
934 }
935
936 if (sat) {
6175f5a0 937 set_vscr_sat(env);
64654ded
BS
938 }
939}
940
dc46167a
LMC
941void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
942 uint32_t v)
64654ded
BS
943{
944 int i;
945
946 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
947 int32_t prod = a->s16[i] * b->s16[i];
948 r->s16[i] = (int16_t) (prod + c->s16[i]);
949 }
950}
951
d81c2040
MCA
952#define VMRG_DO(name, element, access, ofs) \
953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
954 { \
955 ppc_avr_t result; \
956 int i, half = ARRAY_SIZE(r->element) / 2; \
957 \
958 for (i = 0; i < half; i++) { \
959 result.access(i * 2 + 0) = a->access(i + ofs); \
960 result.access(i * 2 + 1) = b->access(i + ofs); \
961 } \
962 *r = result; \
963 }
964
965#define VMRG(suffix, element, access) \
966 VMRG_DO(mrgl##suffix, element, access, half) \
967 VMRG_DO(mrgh##suffix, element, access, 0)
968VMRG(b, u8, VsrB)
969VMRG(h, u16, VsrH)
970VMRG(w, u32, VsrW)
64654ded
BS
971#undef VMRG_DO
972#undef VMRG
64654ded 973
b2dc03a5 974void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
975{
976 int32_t prod[16];
977 int i;
978
979 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
980 prod[i] = (int32_t)a->s8[i] * b->u8[i];
981 }
982
983 VECTOR_FOR_INORDER_I(i, s32) {
984 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
985 prod[4 * i + 2] + prod[4 * i + 3];
986 }
987}
988
6f52f731 989void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
990{
991 int32_t prod[8];
992 int i;
993
994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
995 prod[i] = a->s16[i] * b->s16[i];
996 }
997
998 VECTOR_FOR_INORDER_I(i, s32) {
999 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1000 }
1001}
1002
6f52f731 1003void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
d15f74fb 1004 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1005{
1006 int32_t prod[8];
1007 int i;
1008 int sat = 0;
1009
1010 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1011 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1012 }
1013
1014 VECTOR_FOR_INORDER_I(i, s32) {
1015 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1016
1017 r->u32[i] = cvtsdsw(t, &sat);
1018 }
1019
1020 if (sat) {
6175f5a0 1021 set_vscr_sat(env);
64654ded
BS
1022 }
1023}
1024
b2dc03a5 1025void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1026{
1027 uint16_t prod[16];
1028 int i;
1029
1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1031 prod[i] = a->u8[i] * b->u8[i];
1032 }
1033
1034 VECTOR_FOR_INORDER_I(i, u32) {
1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1036 prod[4 * i + 2] + prod[4 * i + 3];
1037 }
1038}
1039
89a5a1ae 1040void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1041{
1042 uint32_t prod[8];
1043 int i;
1044
1045 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1046 prod[i] = a->u16[i] * b->u16[i];
1047 }
1048
1049 VECTOR_FOR_INORDER_I(i, u32) {
1050 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1051 }
1052}
1053
89a5a1ae 1054void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
d15f74fb 1055 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1056{
1057 uint32_t prod[8];
1058 int i;
1059 int sat = 0;
1060
1061 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1062 prod[i] = a->u16[i] * b->u16[i];
1063 }
1064
1065 VECTOR_FOR_INORDER_I(i, s32) {
1066 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1067
1068 r->u32[i] = cvtuduw(t, &sat);
1069 }
1070
1071 if (sat) {
6175f5a0 1072 set_vscr_sat(env);
64654ded
BS
1073 }
1074}
1075
4fbc89ed 1076#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
80eca687 1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1078 { \
1079 int i; \
1080 \
4fbc89ed
MCA
1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1082 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1083 (cast)b->mul_access(i); \
1084 } \
1085 }
1086
1087#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
80eca687 1088 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
4fbc89ed
MCA
1089 { \
1090 int i; \
1091 \
1092 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1093 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1094 (cast)b->mul_access(i + 1); \
64654ded
BS
1095 } \
1096 }
4fbc89ed
MCA
1097
1098#define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
80eca687
LMC
1099 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1100 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1101VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1102VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1103VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1104VMUL(UB, u8, VsrB, VsrH, uint16_t)
1105VMUL(UH, u16, VsrH, VsrW, uint32_t)
1106VMUL(UW, u32, VsrW, VsrD, uint64_t)
4fbc89ed
MCA
1107#undef VMUL_DO_EVN
1108#undef VMUL_DO_ODD
64654ded
BS
1109#undef VMUL
1110
41c2877f
MF
1111void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1112 target_ulong uim)
1113{
1114 int i, idx;
1115 ppc_vsr_t tmp = { .u64 = {0, 0} };
1116
1117 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1118 if ((pcv->VsrB(i) >> 5) == uim) {
1119 idx = pcv->VsrB(i) & 0x1f;
1120 if (idx < ARRAY_SIZE(t->u8)) {
1121 tmp.VsrB(i) = s0->VsrB(idx);
1122 } else {
1123 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1124 }
1125 }
1126 }
1127
1128 *t = tmp;
1129}
1130
1700f2bf
LMC
1131void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1132{
1133 Int128 neg1 = int128_makes64(-1);
1134 Int128 int128_min = int128_make128(0, INT64_MIN);
1135 if (likely(int128_nz(b->s128) &&
1136 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1137 t->s128 = int128_divs(a->s128, b->s128);
1138 } else {
1139 t->s128 = a->s128; /* Undefined behavior */
1140 }
1141}
1142
1143void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1144{
1145 if (int128_nz(b->s128)) {
1146 t->s128 = int128_divu(a->s128, b->s128);
1147 } else {
1148 t->s128 = a->s128; /* Undefined behavior */
1149 }
1150}
1151
a173ba88
LMC
1152void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1153{
1154 int i;
1155 int64_t high;
1156 uint64_t low;
1157 for (i = 0; i < 2; i++) {
1158 high = a->s64[i];
1159 low = 0;
1160 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1161 t->s64[i] = a->s64[i]; /* Undefined behavior */
1162 } else {
1163 divs128(&low, &high, b->s64[i]);
1164 t->s64[i] = low;
1165 }
1166 }
1167}
1168
1169void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1170{
1171 int i;
1172 uint64_t high, low;
1173 for (i = 0; i < 2; i++) {
1174 high = a->u64[i];
1175 low = 0;
1176 if (unlikely(!b->u64[i])) {
1177 t->u64[i] = a->u64[i]; /* Undefined behavior */
1178 } else {
1179 divu128(&low, &high, b->u64[i]);
1180 t->u64[i] = low;
1181 }
1182 }
1183}
1184
1185void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1186{
1187 Int128 high, low;
1188 Int128 int128_min = int128_make128(0, INT64_MIN);
1189 Int128 neg1 = int128_makes64(-1);
1190
1191 high = a->s128;
1192 low = int128_zero();
1193 if (unlikely(!int128_nz(b->s128) ||
1194 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1195 t->s128 = a->s128; /* Undefined behavior */
1196 } else {
1197 divs256(&low, &high, b->s128);
1198 t->s128 = low;
1199 }
1200}
1201
1202void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1203{
1204 Int128 high, low;
1205
1206 high = a->s128;
1207 low = int128_zero();
1208 if (unlikely(!int128_nz(b->s128))) {
1209 t->s128 = a->s128; /* Undefined behavior */
1210 } else {
1211 divu256(&low, &high, b->s128);
1212 t->s128 = low;
1213 }
1214}
1215
b80bec3a
LMC
1216void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1217{
1218 Int128 neg1 = int128_makes64(-1);
1219 Int128 int128_min = int128_make128(0, INT64_MIN);
1220 if (likely(int128_nz(b->s128) &&
1221 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1222 t->s128 = int128_rems(a->s128, b->s128);
1223 } else {
1224 t->s128 = int128_zero(); /* Undefined behavior */
1225 }
1226}
1227
1228void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1229{
1230 if (likely(int128_nz(b->s128))) {
1231 t->s128 = int128_remu(a->s128, b->s128);
1232 } else {
1233 t->s128 = int128_zero(); /* Undefined behavior */
1234 }
1235}
1236
28347fe2 1237void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1238{
1239 ppc_avr_t result;
1240 int i;
1241
60594fea
MCA
1242 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1243 int s = c->VsrB(i) & 0x1f;
64654ded 1244 int index = s & 0xf;
64654ded
BS
1245
1246 if (s & 0x10) {
60594fea 1247 result.VsrB(i) = b->VsrB(index);
64654ded 1248 } else {
60594fea 1249 result.VsrB(i) = a->VsrB(index);
64654ded
BS
1250 }
1251 }
1252 *r = result;
1253}
1254
28347fe2 1255void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
ab045436
RS
1256{
1257 ppc_avr_t result;
1258 int i;
1259
60594fea
MCA
1260 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1261 int s = c->VsrB(i) & 0x1f;
ab045436 1262 int index = 15 - (s & 0xf);
ab045436
RS
1263
1264 if (s & 0x10) {
60594fea 1265 result.VsrB(i) = a->VsrB(index);
ab045436 1266 } else {
60594fea 1267 result.VsrB(i) = b->VsrB(index);
ab045436
RS
1268 }
1269 }
1270 *r = result;
1271}
1272
618574dd 1273#define XXGENPCV_BE_EXP(NAME, SZ) \
b090f4f1
MF
1274void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1275{ \
1276 ppc_vsr_t tmp; \
1277 \
1278 /* Initialize tmp with the result of an all-zeros mask */ \
1279 tmp.VsrD(0) = 0x1011121314151617; \
1280 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1281 \
1282 /* Iterate over the most significant byte of each element */ \
1283 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1284 if (b->VsrB(i) & 0x80) { \
1285 /* Update each byte of the element */ \
1286 for (int k = 0; k < SZ; k++) { \
1287 tmp.VsrB(i + k) = j + k; \
1288 } \
1289 j += SZ; \
1290 } \
1291 } \
1292 \
1293 *t = tmp; \
618574dd
MF
1294}
1295
1296#define XXGENPCV_BE_COMP(NAME, SZ) \
b090f4f1
MF
1297void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1298{ \
1299 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1300 \
1301 /* Iterate over the most significant byte of each element */ \
1302 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1303 if (b->VsrB(i) & 0x80) { \
1304 /* Update each byte of the element */ \
1305 for (int k = 0; k < SZ; k++) { \
1306 tmp.VsrB(j + k) = i + k; \
1307 } \
1308 j += SZ; \
1309 } \
1310 } \
1311 \
1312 *t = tmp; \
618574dd
MF
1313}
1314
1315#define XXGENPCV_LE_EXP(NAME, SZ) \
b090f4f1
MF
1316void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1317{ \
1318 ppc_vsr_t tmp; \
1319 \
1320 /* Initialize tmp with the result of an all-zeros mask */ \
1321 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1322 tmp.VsrD(1) = 0x1716151413121110; \
1323 \
1324 /* Iterate over the most significant byte of each element */ \
1325 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1326 /* Reverse indexing of "i" */ \
1327 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1328 if (b->VsrB(idx) & 0x80) { \
1329 /* Update each byte of the element */ \
1330 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1331 tmp.VsrB(idx + rk) = j + k; \
1332 } \
1333 j += SZ; \
1334 } \
1335 } \
1336 \
1337 *t = tmp; \
618574dd
MF
1338}
1339
1340#define XXGENPCV_LE_COMP(NAME, SZ) \
b090f4f1
MF
1341void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1342{ \
1343 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1344 \
1345 /* Iterate over the most significant byte of each element */ \
1346 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1347 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1348 /* Update each byte of the element */ \
1349 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1350 /* Reverse indexing of "j" */ \
1351 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1352 tmp.VsrB(idx + rk) = i + k; \
1353 } \
1354 j += SZ; \
1355 } \
1356 } \
1357 \
1358 *t = tmp; \
1359}
1360
618574dd
MF
1361#define XXGENPCV(NAME, SZ) \
1362 XXGENPCV_BE_EXP(NAME, SZ) \
1363 XXGENPCV_BE_COMP(NAME, SZ) \
1364 XXGENPCV_LE_EXP(NAME, SZ) \
1365 XXGENPCV_LE_COMP(NAME, SZ) \
1366
b090f4f1
MF
1367XXGENPCV(XXGENPCVBM, 1)
1368XXGENPCV(XXGENPCVHM, 2)
1369XXGENPCV(XXGENPCVWM, 4)
1370XXGENPCV(XXGENPCVDM, 8)
618574dd
MF
1371
1372#undef XXGENPCV_BE_EXP
1373#undef XXGENPCV_BE_COMP
1374#undef XXGENPCV_LE_EXP
1375#undef XXGENPCV_LE_COMP
b090f4f1
MF
1376#undef XXGENPCV
1377
e03b5686 1378#if HOST_BIG_ENDIAN
4d82038e 1379#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
01fe9a47 1380#define VBPERMD_INDEX(i) (i)
4d82038e
TM
1381#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1382#else
b6cb41b2 1383#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
01fe9a47 1384#define VBPERMD_INDEX(i) (1 - i)
4d82038e
TM
1385#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1386#endif
8f7d41e0
MF
1387#define EXTRACT_BIT(avr, i, index) \
1388 (extract64((avr)->VsrD(i), 63 - index, 1))
4d82038e 1389
01fe9a47
RS
1390void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1391{
1392 int i, j;
1393 ppc_avr_t result = { .u64 = { 0, 0 } };
1394 VECTOR_FOR_INORDER_I(i, u64) {
1395 for (j = 0; j < 8; j++) {
1396 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1397 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1398 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1399 }
1400 }
1401 }
1402 *r = result;
1403}
1404
4d82038e
TM
1405void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1406{
1407 int i;
1408 uint64_t perm = 0;
1409
1410 VECTOR_FOR_INORDER_I(i, u8) {
1411 int index = VBPERMQ_INDEX(b, i);
1412
1413 if (index < 128) {
b6cb41b2 1414 uint64_t mask = (1ull << (63 - (index & 0x3F)));
4d82038e
TM
1415 if (a->u64[VBPERMQ_DW(index)] & mask) {
1416 perm |= (0x8000 >> i);
1417 }
1418 }
1419 }
1420
3c385a93
MCA
1421 r->VsrD(0) = perm;
1422 r->VsrD(1) = 0;
4d82038e
TM
1423}
1424
1425#undef VBPERMQ_INDEX
1426#undef VBPERMQ_DW
1427
cec4090d
RH
1428/*
1429 * There is no carry across the two doublewords, so their order does
1430 * not matter. Nor is there partial overlap between registers.
1431 */
1432void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1433{
1434 for (int i = 0; i < 2; ++i) {
1435 uint64_t aa = a->u64[i], bb = b->u64[i];
1436 r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb);
1437 }
1438}
1439
a2c67342
RH
1440void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1441{
1442 for (int i = 0; i < 2; ++i) {
1443 uint64_t aa = a->u64[i], bb = b->u64[i];
1444 r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb);
1445 }
1446}
1447
f56d3c1a
RH
1448void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1449{
1450 for (int i = 0; i < 2; ++i) {
1451 uint64_t aa = a->u64[i], bb = b->u64[i];
1452 r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32);
1453 }
1454}
b8476fc7 1455
e82ca8ac 1456void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
b8476fc7 1457{
7bdbf233
RH
1458 Int128 e = clmul_64(a->u64[0], b->u64[0]);
1459 Int128 o = clmul_64(a->u64[1], b->u64[1]);
1460 r->s128 = int128_xor(e, o);
b8476fc7
TM
1461}
1462
e03b5686 1463#if HOST_BIG_ENDIAN
64654ded
BS
1464#define PKBIG 1
1465#else
1466#define PKBIG 0
1467#endif
1468void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1469{
1470 int i, j;
1471 ppc_avr_t result;
e03b5686 1472#if HOST_BIG_ENDIAN
64654ded
BS
1473 const ppc_avr_t *x[2] = { a, b };
1474#else
1475 const ppc_avr_t *x[2] = { b, a };
1476#endif
1477
1478 VECTOR_FOR_INORDER_I(i, u64) {
1479 VECTOR_FOR_INORDER_I(j, u32) {
1480 uint32_t e = x[i]->u32[j];
1481
b6cb41b2
DG
1482 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1483 ((e >> 6) & 0x3e0) |
1484 ((e >> 3) & 0x1f));
64654ded
BS
1485 }
1486 }
1487 *r = result;
1488}
1489
1490#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1491 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1492 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1493 { \
1494 int i; \
1495 int sat = 0; \
1496 ppc_avr_t result; \
1497 ppc_avr_t *a0 = PKBIG ? a : b; \
1498 ppc_avr_t *a1 = PKBIG ? b : a; \
1499 \
1500 VECTOR_FOR_INORDER_I(i, from) { \
1501 result.to[i] = cvt(a0->from[i], &sat); \
b6cb41b2 1502 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
64654ded
BS
1503 } \
1504 *r = result; \
1505 if (dosat && sat) { \
6175f5a0 1506 set_vscr_sat(env); \
64654ded
BS
1507 } \
1508 }
1509#define I(x, y) (x)
1510VPK(shss, s16, s8, cvtshsb, 1)
1511VPK(shus, s16, u8, cvtshub, 1)
1512VPK(swss, s32, s16, cvtswsh, 1)
1513VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1514VPK(sdss, s64, s32, cvtsdsw, 1)
1515VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1516VPK(uhus, u16, u8, cvtuhub, 1)
1517VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1518VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1519VPK(uhum, u16, u8, I, 0)
1520VPK(uwum, u32, u16, I, 0)
024215b2 1521VPK(udum, u64, u32, I, 0)
64654ded
BS
1522#undef I
1523#undef VPK
1524#undef PKBIG
1525
d15f74fb 1526void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1527{
1528 int i;
1529
05ee3e8a
MCA
1530 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1531 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
64654ded
BS
1532 }
1533}
1534
1535#define VRFI(suffix, rounding) \
d15f74fb
BS
1536 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1537 ppc_avr_t *b) \
64654ded
BS
1538 { \
1539 int i; \
1540 float_status s = env->vec_status; \
1541 \
1542 set_float_rounding_mode(rounding, &s); \
05ee3e8a
MCA
1543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1544 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
64654ded
BS
1545 } \
1546 }
1547VRFI(n, float_round_nearest_even)
1548VRFI(m, float_round_down)
1549VRFI(p, float_round_up)
1550VRFI(z, float_round_to_zero)
1551#undef VRFI
1552
d15f74fb 1553void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1554{
1555 int i;
1556
05ee3e8a
MCA
1557 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1558 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
64654ded 1559
05ee3e8a 1560 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1561 }
1562}
1563
02c74f0e
MF
1564#define VRLMI(name, size, element, insert) \
1565void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1566{ \
1567 int i; \
1568 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1569 uint##size##_t src1 = a->element[i]; \
1570 uint##size##_t src2 = b->element[i]; \
1571 uint##size##_t src3 = r->element[i]; \
1572 uint##size##_t begin, end, shift, mask, rot_val; \
1573 \
1574 shift = extract##size(src2, 0, 6); \
1575 end = extract##size(src2, 8, 6); \
1576 begin = extract##size(src2, 16, 6); \
1577 rot_val = rol##size(src1, shift); \
1578 mask = mask_u##size(begin, end); \
1579 if (insert) { \
1580 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1581 } else { \
1582 r->element[i] = (rot_val & mask); \
1583 } \
1584 } \
3e00884f
GS
1585}
1586
02c74f0e
MF
1587VRLMI(VRLDMI, 64, u64, 1);
1588VRLMI(VRLWMI, 32, u32, 1);
1589VRLMI(VRLDNM, 64, u64, 0);
1590VRLMI(VRLWNM, 32, u32, 0);
3e00884f 1591
d15f74fb 1592void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1593{
1594 int i;
1595
05ee3e8a
MCA
1596 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1597 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
64654ded
BS
1598 }
1599}
1600
d15f74fb 1601void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1602{
1603 int i;
1604
05ee3e8a
MCA
1605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1606 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
64654ded
BS
1607 }
1608}
1609
f297c4c6
MF
1610#define VEXTU_X_DO(name, size, left) \
1611target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1612{ \
1613 int index = (a & 0xf) * 8; \
1614 if (left) { \
1615 index = 128 - index - size; \
1616 } \
1617 return int128_getlo(int128_rshift(b->s128, index)) & \
1618 MAKE_64BIT_MASK(0, size); \
1619}
60caf221
AK
1620VEXTU_X_DO(vextublx, 8, 1)
1621VEXTU_X_DO(vextuhlx, 16, 1)
1622VEXTU_X_DO(vextuwlx, 32, 1)
1623VEXTU_X_DO(vextubrx, 8, 0)
1624VEXTU_X_DO(vextuhrx, 16, 0)
1625VEXTU_X_DO(vextuwrx, 32, 0)
1626#undef VEXTU_X_DO
1627
5644a175
VAS
1628void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1629{
1630 int i;
1631 unsigned int shift, bytes, size;
1632
1633 size = ARRAY_SIZE(r->u8);
1634 for (i = 0; i < size; i++) {
63be02fc
AB
1635 shift = b->VsrB(i) & 0x7; /* extract shift value */
1636 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1637 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1638 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
5644a175
VAS
1639 }
1640}
1641
4004c1db
VAS
1642void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1643{
1644 int i;
1645 unsigned int shift, bytes;
1646
b6cb41b2
DG
1647 /*
1648 * Use reverse order, as destination and source register can be
1649 * same. Its being modified in place saving temporary, reverse
1650 * order will guarantee that computed result is not fed back.
4004c1db
VAS
1651 */
1652 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
63be02fc
AB
1653 shift = b->VsrB(i) & 0x7; /* extract shift value */
1654 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
4004c1db 1655 /* extract adjacent bytes */
63be02fc 1656 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
4004c1db
VAS
1657 }
1658}
1659
64654ded
BS
1660void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1661{
1662 int sh = shift & 0xf;
1663 int i;
1664 ppc_avr_t result;
1665
64654ded
BS
1666 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1667 int index = sh + i;
1668 if (index > 0xf) {
60594fea 1669 result.VsrB(i) = b->VsrB(index - 0x10);
64654ded 1670 } else {
60594fea 1671 result.VsrB(i) = a->VsrB(index);
64654ded
BS
1672 }
1673 }
64654ded
BS
1674 *r = result;
1675}
1676
1677void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1678{
3c385a93 1679 int sh = (b->VsrB(0xf) >> 3) & 0xf;
64654ded 1680
e03b5686 1681#if HOST_BIG_ENDIAN
64654ded 1682 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
b6cb41b2 1683 memset(&r->u8[16 - sh], 0, sh);
64654ded
BS
1684#else
1685 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1686 memset(&r->u8[0], 0, sh);
1687#endif
1688}
1689
e03b5686 1690#if HOST_BIG_ENDIAN
2cc12af3
MF
1691#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1692#else
1693#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1694#endif
1695
1696#define VINSX(SUFFIX, TYPE) \
1697void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1698 uint64_t val, target_ulong index) \
1699{ \
1700 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1701 target_long idx = index; \
1702 \
1703 if (idx < 0 || idx > maxidx) { \
1704 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1705 qemu_log_mask(LOG_GUEST_ERROR, \
1706 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1707 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1708 } else { \
1709 TYPE src = val; \
1710 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1711 } \
1712}
1713VINSX(B, uint8_t)
1714VINSX(H, uint16_t)
1715VINSX(W, uint32_t)
1716VINSX(D, uint64_t)
1717#undef ELEM_ADDR
1718#undef VINSX
e03b5686 1719#if HOST_BIG_ENDIAN
28110b72
MF
1720#define VEXTDVLX(NAME, SIZE) \
1721void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1722 target_ulong index) \
1723{ \
1724 const target_long idx = index; \
1725 ppc_avr_t tmp[2] = { *a, *b }; \
1726 memset(t, 0, sizeof(*t)); \
1727 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1728 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1729 } else { \
1730 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1731 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1732 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1733 } \
1734}
1735#else
1736#define VEXTDVLX(NAME, SIZE) \
1737void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1738 target_ulong index) \
1739{ \
1740 const target_long idx = index; \
1741 ppc_avr_t tmp[2] = { *b, *a }; \
1742 memset(t, 0, sizeof(*t)); \
1743 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1744 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1745 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1746 } else { \
1747 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1748 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1749 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1750 } \
1751}
1752#endif
1753VEXTDVLX(VEXTDUBVLX, 1)
1754VEXTDVLX(VEXTDUHVLX, 2)
1755VEXTDVLX(VEXTDUWVLX, 4)
1756VEXTDVLX(VEXTDDVLX, 8)
1757#undef VEXTDVLX
e03b5686 1758#if HOST_BIG_ENDIAN
b5d569a1
RS
1759#define VEXTRACT(suffix, element) \
1760 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1761 { \
1762 uint32_t es = sizeof(r->element[0]); \
1763 memmove(&r->u8[8 - es], &b->u8[index], es); \
1764 memset(&r->u8[8], 0, 8); \
1765 memset(&r->u8[0], 0, 8 - es); \
1766 }
1767#else
1768#define VEXTRACT(suffix, element) \
1769 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1770 { \
1771 uint32_t es = sizeof(r->element[0]); \
1772 uint32_t s = (16 - index) - es; \
1773 memmove(&r->u8[8], &b->u8[s], es); \
1774 memset(&r->u8[0], 0, 8); \
1775 memset(&r->u8[8 + es], 0, 8 - es); \
1776 }
1777#endif
1778VEXTRACT(ub, u8)
1779VEXTRACT(uh, u16)
1780VEXTRACT(uw, u32)
1781VEXTRACT(d, u64)
1782#undef VEXTRACT
64654ded 1783
fb5303cc
MF
1784#define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1785uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1786{ \
1787 int i, idx, crf = 0; \
1788 \
1789 for (i = 0; i < NUM_ELEMS; i++) { \
1790 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1791 if (b->Vsr##ELEM(idx)) { \
1792 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1793 } else { \
1794 crf = 0b0010; \
1795 break; \
1796 } \
1797 } \
1798 \
1799 for (; i < NUM_ELEMS; i++) { \
1800 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1801 t->Vsr##ELEM(idx) = 0; \
1802 } \
1803 \
1804 return crf; \
1805}
1806VSTRI(VSTRIBL, B, 16, true)
1807VSTRI(VSTRIBR, B, 16, false)
1808VSTRI(VSTRIHL, H, 8, true)
1809VSTRI(VSTRIHR, H, 8, false)
1810#undef VSTRI
1811
8f5eeee3 1812void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
8ad901e5 1813{
03b32c09 1814 ppc_vsr_t t = { };
8ad901e5
ND
1815 size_t es = sizeof(uint32_t);
1816 uint32_t ext_index;
1817 int i;
1818
8ad901e5
ND
1819 ext_index = index;
1820 for (i = 0; i < es; i++, ext_index++) {
03b32c09 1821 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
8ad901e5 1822 }
8ad901e5 1823
03b32c09 1824 *xt = t;
8ad901e5
ND
1825}
1826
8f5eeee3 1827void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
3398b742 1828{
03b32c09 1829 ppc_vsr_t t = *xt;
3398b742
ND
1830 size_t es = sizeof(uint32_t);
1831 int ins_index, i = 0;
1832
3398b742
ND
1833 ins_index = index;
1834 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
03b32c09 1835 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
3398b742 1836 }
3398b742 1837
03b32c09 1838 *xt = t;
3398b742
ND
1839}
1840
1015fcab
MF
1841void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1842 uint32_t desc)
1843{
1844 /*
1845 * Instead of processing imm bit-by-bit, we'll skip the computation of
1846 * conjunctions whose corresponding bit is unset.
1847 */
1848 int bit, imm = simd_data(desc);
1849 Int128 conj, disj = int128_zero();
1850
1851 /* Iterate over set bits from the least to the most significant bit */
1852 while (imm) {
1853 /*
1854 * Get the next bit to be processed with ctz64. Invert the result of
1855 * ctz64 to match the indexing used by PowerISA.
1856 */
1857 bit = 7 - ctzl(imm);
1858 if (bit & 0x4) {
1859 conj = a->s128;
1860 } else {
1861 conj = int128_not(a->s128);
1862 }
1863 if (bit & 0x2) {
1864 conj = int128_and(conj, b->s128);
1865 } else {
1866 conj = int128_and(conj, int128_not(b->s128));
1867 }
1868 if (bit & 0x1) {
1869 conj = int128_and(conj, c->s128);
1870 } else {
1871 conj = int128_and(conj, int128_not(c->s128));
1872 }
1873 disj = int128_or(disj, conj);
1874
1875 /* Unset the least significant bit that is set */
1876 imm &= imm - 1;
1877 }
1878
1879 t->s128 = disj;
1880}
1881
788c6399
MF
1882#define XXBLEND(name, sz) \
1883void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1884 ppc_avr_t *c, uint32_t desc) \
1885{ \
1886 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1887 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1888 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1889 } \
1890}
1891XXBLEND(B, 8)
1892XXBLEND(H, 16)
1893XXBLEND(W, 32)
1894XXBLEND(D, 64)
1895#undef XXBLEND
1896
64654ded
BS
1897void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1898{
3c385a93 1899 int sh = (b->VsrB(0xf) >> 3) & 0xf;
64654ded 1900
e03b5686 1901#if HOST_BIG_ENDIAN
64654ded
BS
1902 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1903 memset(&r->u8[0], 0, sh);
1904#else
1905 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1906 memset(&r->u8[16 - sh], 0, sh);
1907#endif
1908}
1909
d15f74fb 1910void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1911{
1912 int64_t t;
1913 int i, upper;
1914 ppc_avr_t result;
1915 int sat = 0;
1916
60594fea
MCA
1917 upper = ARRAY_SIZE(r->s32) - 1;
1918 t = (int64_t)b->VsrSW(upper);
64654ded 1919 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
60594fea
MCA
1920 t += a->VsrSW(i);
1921 result.VsrSW(i) = 0;
64654ded 1922 }
60594fea 1923 result.VsrSW(upper) = cvtsdsw(t, &sat);
64654ded
BS
1924 *r = result;
1925
1926 if (sat) {
6175f5a0 1927 set_vscr_sat(env);
64654ded
BS
1928 }
1929}
1930
d15f74fb 1931void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1932{
1933 int i, j, upper;
1934 ppc_avr_t result;
1935 int sat = 0;
1936
64654ded 1937 upper = 1;
64654ded 1938 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
60594fea 1939 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
64654ded 1940
7fa0ddc1 1941 result.VsrD(i) = 0;
64654ded 1942 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
60594fea 1943 t += a->VsrSW(2 * i + j);
64654ded 1944 }
60594fea 1945 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
64654ded
BS
1946 }
1947
1948 *r = result;
1949 if (sat) {
6175f5a0 1950 set_vscr_sat(env);
64654ded
BS
1951 }
1952}
1953
d15f74fb 1954void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1955{
1956 int i, j;
1957 int sat = 0;
1958
1959 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1960 int64_t t = (int64_t)b->s32[i];
1961
1962 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1963 t += a->s8[4 * i + j];
1964 }
1965 r->s32[i] = cvtsdsw(t, &sat);
1966 }
1967
1968 if (sat) {
6175f5a0 1969 set_vscr_sat(env);
64654ded
BS
1970 }
1971}
1972
d15f74fb 1973void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1974{
1975 int sat = 0;
1976 int i;
1977
1978 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1979 int64_t t = (int64_t)b->s32[i];
1980
1981 t += a->s16[2 * i] + a->s16[2 * i + 1];
1982 r->s32[i] = cvtsdsw(t, &sat);
1983 }
1984
1985 if (sat) {
6175f5a0 1986 set_vscr_sat(env);
64654ded
BS
1987 }
1988}
1989
d15f74fb 1990void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1991{
1992 int i, j;
1993 int sat = 0;
1994
1995 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1996 uint64_t t = (uint64_t)b->u32[i];
1997
1998 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1999 t += a->u8[4 * i + j];
2000 }
2001 r->u32[i] = cvtuduw(t, &sat);
2002 }
2003
2004 if (sat) {
6175f5a0 2005 set_vscr_sat(env);
64654ded
BS
2006 }
2007}
2008
e03b5686 2009#if HOST_BIG_ENDIAN
64654ded
BS
2010#define UPKHI 1
2011#define UPKLO 0
2012#else
2013#define UPKHI 0
2014#define UPKLO 1
2015#endif
2016#define VUPKPX(suffix, hi) \
2017 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2018 { \
2019 int i; \
2020 ppc_avr_t result; \
2021 \
2022 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
b6cb41b2 2023 uint16_t e = b->u16[hi ? i : i + 4]; \
64654ded
BS
2024 uint8_t a = (e >> 15) ? 0xff : 0; \
2025 uint8_t r = (e >> 10) & 0x1f; \
2026 uint8_t g = (e >> 5) & 0x1f; \
2027 uint8_t b = e & 0x1f; \
2028 \
2029 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2030 } \
2031 *r = result; \
2032 }
2033VUPKPX(lpx, UPKLO)
2034VUPKPX(hpx, UPKHI)
2035#undef VUPKPX
2036
2037#define VUPK(suffix, unpacked, packee, hi) \
2038 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2039 { \
2040 int i; \
2041 ppc_avr_t result; \
2042 \
2043 if (hi) { \
2044 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2045 result.unpacked[i] = b->packee[i]; \
2046 } \
2047 } else { \
2048 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2049 i++) { \
2050 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2051 } \
2052 } \
2053 *r = result; \
2054 }
2055VUPK(hsb, s16, s8, UPKHI)
2056VUPK(hsh, s32, s16, UPKHI)
4430e076 2057VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2058VUPK(lsb, s16, s8, UPKLO)
2059VUPK(lsh, s32, s16, UPKLO)
4430e076 2060VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2061#undef VUPK
2062#undef UPKHI
2063#undef UPKLO
2064
f293f04a
TM
2065#define VGENERIC_DO(name, element) \
2066 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2067 { \
2068 int i; \
2069 \
60594fea 2070 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
f293f04a
TM
2071 r->element[i] = name(b->element[i]); \
2072 } \
2073 }
2074
2075#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2076#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
f293f04a
TM
2077
2078VGENERIC_DO(clzb, u8)
2079VGENERIC_DO(clzh, u16)
f293f04a
TM
2080
2081#undef clzb
2082#undef clzh
f293f04a 2083
a5ad8fbf
RS
2084#define ctzb(v) ((v) ? ctz32(v) : 8)
2085#define ctzh(v) ((v) ? ctz32(v) : 16)
2086#define ctzw(v) ctz32((v))
2087#define ctzd(v) ctz64((v))
2088
2089VGENERIC_DO(ctzb, u8)
2090VGENERIC_DO(ctzh, u16)
2091VGENERIC_DO(ctzw, u32)
2092VGENERIC_DO(ctzd, u64)
2093
2094#undef ctzb
2095#undef ctzh
2096#undef ctzw
2097#undef ctzd
2098
e13500b3
TM
2099#define popcntb(v) ctpop8(v)
2100#define popcnth(v) ctpop16(v)
2101#define popcntw(v) ctpop32(v)
2102#define popcntd(v) ctpop64(v)
2103
2104VGENERIC_DO(popcntb, u8)
2105VGENERIC_DO(popcnth, u16)
2106VGENERIC_DO(popcntw, u32)
2107VGENERIC_DO(popcntd, u64)
2108
2109#undef popcntb
2110#undef popcnth
2111#undef popcntw
2112#undef popcntd
f293f04a
TM
2113
2114#undef VGENERIC_DO
2115
7ca04286 2116void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
b41da4eb 2117{
7ca04286 2118 r->s128 = int128_add(a->s128, b->s128);
b41da4eb
TM
2119}
2120
896d92c8 2121void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
b41da4eb 2122{
896d92c8
MF
2123 r->s128 = int128_add(int128_add(a->s128, b->s128),
2124 int128_make64(int128_getlo(c->s128) & 1));
b41da4eb
TM
2125}
2126
8290ea50 2127void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
b41da4eb 2128{
8290ea50 2129 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
3c385a93 2130 r->VsrD(0) = 0;
b41da4eb
TM
2131}
2132
896d92c8 2133void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
b41da4eb 2134{
896d92c8
MF
2135 bool carry_out = int128_ult(int128_not(a->s128), b->s128),
2136 carry_in = int128_getlo(c->s128) & 1;
b41da4eb
TM
2137
2138 if (!carry_out && carry_in) {
896d92c8
MF
2139 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
2140 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
b41da4eb 2141 }
896d92c8 2142
3c385a93
MCA
2143 r->VsrD(0) = 0;
2144 r->VsrD(1) = carry_out;
b41da4eb
TM
2145}
2146
b132be53 2147void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
b41da4eb 2148{
b132be53 2149 r->s128 = int128_sub(a->s128, b->s128);
b41da4eb
TM
2150}
2151
e6a5ad43 2152void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
b41da4eb 2153{
e6a5ad43
MF
2154 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
2155 int128_make64(int128_getlo(c->s128) & 1));
b41da4eb
TM
2156}
2157
b7d30fae 2158void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
b41da4eb 2159{
b7d30fae
MF
2160 Int128 tmp = int128_not(b->s128);
2161
2162 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
2163 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
3c385a93 2164 r->VsrD(0) = 0;
b41da4eb
TM
2165}
2166
e6a5ad43 2167void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
b41da4eb 2168{
e6a5ad43
MF
2169 Int128 tmp = int128_not(b->s128);
2170 bool carry_out = int128_ult(int128_not(a->s128), tmp),
2171 carry_in = int128_getlo(c->s128) & 1;
b41da4eb 2172
e6a5ad43
MF
2173 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
2174 int128_makes64(-1)));
3c385a93 2175 r->VsrD(0) = 0;
b41da4eb
TM
2176}
2177
e8f7b27b
TM
2178#define BCD_PLUS_PREF_1 0xC
2179#define BCD_PLUS_PREF_2 0xF
2180#define BCD_PLUS_ALT_1 0xA
2181#define BCD_NEG_PREF 0xD
2182#define BCD_NEG_ALT 0xB
2183#define BCD_PLUS_ALT_2 0xE
b8155872
JRZ
2184#define NATIONAL_PLUS 0x2B
2185#define NATIONAL_NEG 0x2D
e8f7b27b 2186
365206ae 2187#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
e8f7b27b
TM
2188
2189static int bcd_get_sgn(ppc_avr_t *bcd)
2190{
428115c3 2191 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
e8f7b27b
TM
2192 case BCD_PLUS_PREF_1:
2193 case BCD_PLUS_PREF_2:
2194 case BCD_PLUS_ALT_1:
2195 case BCD_PLUS_ALT_2:
2196 {
2197 return 1;
2198 }
2199
2200 case BCD_NEG_PREF:
2201 case BCD_NEG_ALT:
2202 {
2203 return -1;
2204 }
2205
2206 default:
2207 {
2208 return 0;
2209 }
2210 }
2211}
2212
2213static int bcd_preferred_sgn(int sgn, int ps)
2214{
2215 if (sgn >= 0) {
2216 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2217 } else {
2218 return BCD_NEG_PREF;
2219 }
2220}
2221
2222static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2223{
2224 uint8_t result;
2225 if (n & 1) {
428115c3 2226 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
e8f7b27b 2227 } else {
428115c3 2228 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
e8f7b27b
TM
2229 }
2230
2231 if (unlikely(result > 9)) {
2232 *invalid = true;
2233 }
2234 return result;
2235}
2236
2237static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2238{
2239 if (n & 1) {
428115c3
MCA
2240 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2241 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
e8f7b27b 2242 } else {
428115c3
MCA
2243 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2244 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
e8f7b27b
TM
2245 }
2246}
2247
071663df
JRZ
2248static bool bcd_is_valid(ppc_avr_t *bcd)
2249{
2250 int i;
2251 int invalid = 0;
2252
2253 if (bcd_get_sgn(bcd) == 0) {
2254 return false;
2255 }
2256
2257 for (i = 1; i < 32; i++) {
2258 bcd_get_digit(bcd, i, &invalid);
2259 if (unlikely(invalid)) {
2260 return false;
2261 }
2262 }
2263 return true;
2264}
2265
b8155872
JRZ
2266static int bcd_cmp_zero(ppc_avr_t *bcd)
2267{
3c385a93 2268 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
efa73196 2269 return CRF_EQ;
b8155872 2270 } else {
efa73196 2271 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
b8155872
JRZ
2272 }
2273}
2274
2275static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2276{
60594fea 2277 return reg->VsrH(7 - n);
b8155872
JRZ
2278}
2279
e2106d73
JRZ
2280static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2281{
60594fea 2282 reg->VsrH(7 - n) = val;
e2106d73
JRZ
2283}
2284
e8f7b27b
TM
2285static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2286{
2287 int i;
2288 int invalid = 0;
2289 for (i = 31; i > 0; i--) {
2290 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2291 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2292 if (unlikely(invalid)) {
3b163b01 2293 return 0; /* doesn't matter */
e8f7b27b
TM
2294 } else if (dig_a > dig_b) {
2295 return 1;
2296 } else if (dig_a < dig_b) {
2297 return -1;
2298 }
2299 }
2300
2301 return 0;
2302}
2303
936fda4d 2304static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
e8f7b27b
TM
2305 int *overflow)
2306{
2307 int carry = 0;
2308 int i;
936fda4d
FR
2309 int is_zero = 1;
2310
e8f7b27b
TM
2311 for (i = 1; i <= 31; i++) {
2312 uint8_t digit = bcd_get_digit(a, i, invalid) +
2313 bcd_get_digit(b, i, invalid) + carry;
936fda4d 2314 is_zero &= (digit == 0);
e8f7b27b
TM
2315 if (digit > 9) {
2316 carry = 1;
2317 digit -= 10;
2318 } else {
2319 carry = 0;
2320 }
2321
2322 bcd_put_digit(t, digit, i);
e8f7b27b
TM
2323 }
2324
2325 *overflow = carry;
936fda4d 2326 return is_zero;
e8f7b27b
TM
2327}
2328
d03b174a 2329static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
e8f7b27b
TM
2330 int *overflow)
2331{
2332 int carry = 0;
2333 int i;
d03b174a 2334
e8f7b27b
TM
2335 for (i = 1; i <= 31; i++) {
2336 uint8_t digit = bcd_get_digit(a, i, invalid) -
2337 bcd_get_digit(b, i, invalid) + carry;
e8f7b27b
TM
2338 if (digit & 0x80) {
2339 carry = -1;
2340 digit += 10;
2341 } else {
2342 carry = 0;
2343 }
2344
2345 bcd_put_digit(t, digit, i);
e8f7b27b
TM
2346 }
2347
2348 *overflow = carry;
e8f7b27b
TM
2349}
2350
2351uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2352{
2353
2354 int sgna = bcd_get_sgn(a);
2355 int sgnb = bcd_get_sgn(b);
2356 int invalid = (sgna == 0) || (sgnb == 0);
2357 int overflow = 0;
936fda4d 2358 int zero = 0;
e8f7b27b
TM
2359 uint32_t cr = 0;
2360 ppc_avr_t result = { .u64 = { 0, 0 } };
2361
2362 if (!invalid) {
2363 if (sgna == sgnb) {
428115c3 2364 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
936fda4d
FR
2365 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2366 cr = (sgna > 0) ? CRF_GT : CRF_LT;
e8f7b27b 2367 } else {
d03b174a
YB
2368 int magnitude = bcd_cmp_mag(a, b);
2369 if (magnitude > 0) {
428115c3 2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
d03b174a
YB
2371 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2372 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2373 } else if (magnitude < 0) {
428115c3 2374 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
d03b174a
YB
2375 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2376 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2377 } else {
428115c3 2378 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
d03b174a
YB
2379 cr = CRF_EQ;
2380 }
e8f7b27b
TM
2381 }
2382 }
2383
2384 if (unlikely(invalid)) {
3c385a93 2385 result.VsrD(0) = result.VsrD(1) = -1;
efa73196 2386 cr = CRF_SO;
e8f7b27b 2387 } else if (overflow) {
efa73196 2388 cr |= CRF_SO;
936fda4d
FR
2389 } else if (zero) {
2390 cr |= CRF_EQ;
e8f7b27b
TM
2391 }
2392
2393 *r = result;
2394
2395 return cr;
2396}
2397
2398uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2399{
2400 ppc_avr_t bcopy = *b;
2401 int sgnb = bcd_get_sgn(b);
2402 if (sgnb < 0) {
2403 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2404 } else if (sgnb > 0) {
2405 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2406 }
2407 /* else invalid ... defer to bcdadd code for proper handling */
2408
2409 return helper_bcdadd(r, a, &bcopy, ps);
2410}
f293f04a 2411
b8155872
JRZ
2412uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2413{
2414 int i;
2415 int cr = 0;
2416 uint16_t national = 0;
2417 uint16_t sgnb = get_national_digit(b, 0);
2418 ppc_avr_t ret = { .u64 = { 0, 0 } };
2419 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2420
2421 for (i = 1; i < 8; i++) {
2422 national = get_national_digit(b, i);
2423 if (unlikely(national < 0x30 || national > 0x39)) {
2424 invalid = 1;
2425 break;
2426 }
2427
2428 bcd_put_digit(&ret, national & 0xf, i);
2429 }
2430
2431 if (sgnb == NATIONAL_PLUS) {
2432 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2433 } else {
2434 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2435 }
2436
2437 cr = bcd_cmp_zero(&ret);
2438
2439 if (unlikely(invalid)) {
efa73196 2440 cr = CRF_SO;
b8155872
JRZ
2441 }
2442
2443 *r = ret;
2444
2445 return cr;
2446}
2447
e2106d73
JRZ
2448uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2449{
2450 int i;
2451 int cr = 0;
2452 int sgnb = bcd_get_sgn(b);
2453 int invalid = (sgnb == 0);
2454 ppc_avr_t ret = { .u64 = { 0, 0 } };
2455
3c385a93 2456 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
e2106d73
JRZ
2457
2458 for (i = 1; i < 8; i++) {
2459 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2460
2461 if (unlikely(invalid)) {
2462 break;
2463 }
2464 }
2465 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2466
2467 cr = bcd_cmp_zero(b);
2468
2469 if (ox_flag) {
efa73196 2470 cr |= CRF_SO;
e2106d73
JRZ
2471 }
2472
2473 if (unlikely(invalid)) {
efa73196 2474 cr = CRF_SO;
e2106d73
JRZ
2475 }
2476
2477 *r = ret;
2478
2479 return cr;
2480}
2481
38f4cb04
JRZ
2482uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2483{
2484 int i;
2485 int cr = 0;
2486 int invalid = 0;
2487 int zone_digit = 0;
2488 int zone_lead = ps ? 0xF : 0x3;
2489 int digit = 0;
2490 ppc_avr_t ret = { .u64 = { 0, 0 } };
428115c3 2491 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
38f4cb04
JRZ
2492
2493 if (unlikely((sgnb < 0xA) && ps)) {
2494 invalid = 1;
2495 }
2496
2497 for (i = 0; i < 16; i++) {
428115c3
MCA
2498 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2499 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
38f4cb04
JRZ
2500 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2501 invalid = 1;
2502 break;
2503 }
2504
2505 bcd_put_digit(&ret, digit, i + 1);
2506 }
2507
2508 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2509 (!ps && (sgnb & 0x4))) {
2510 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2511 } else {
2512 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2513 }
2514
2515 cr = bcd_cmp_zero(&ret);
2516
2517 if (unlikely(invalid)) {
efa73196 2518 cr = CRF_SO;
38f4cb04
JRZ
2519 }
2520
2521 *r = ret;
2522
2523 return cr;
2524}
2525
0a890b31
JRZ
2526uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2527{
2528 int i;
2529 int cr = 0;
2530 uint8_t digit = 0;
2531 int sgnb = bcd_get_sgn(b);
2532 int zone_lead = (ps) ? 0xF0 : 0x30;
2533 int invalid = (sgnb == 0);
2534 ppc_avr_t ret = { .u64 = { 0, 0 } };
2535
3c385a93 2536 int ox_flag = ((b->VsrD(0) >> 4) != 0);
0a890b31
JRZ
2537
2538 for (i = 0; i < 16; i++) {
2539 digit = bcd_get_digit(b, i + 1, &invalid);
2540
2541 if (unlikely(invalid)) {
2542 break;
2543 }
2544
428115c3 2545 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
0a890b31
JRZ
2546 }
2547
2548 if (ps) {
2549 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2550 } else {
2551 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2552 }
2553
2554 cr = bcd_cmp_zero(b);
2555
2556 if (ox_flag) {
efa73196 2557 cr |= CRF_SO;
0a890b31
JRZ
2558 }
2559
2560 if (unlikely(invalid)) {
efa73196 2561 cr = CRF_SO;
0a890b31
JRZ
2562 }
2563
2564 *r = ret;
2565
2566 return cr;
2567}
2568
a3d67f3e
LP
2569/**
2570 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2571 *
2572 * Returns:
2573 * > 0 if ahi|alo > bhi|blo,
2574 * 0 if ahi|alo == bhi|blo,
2575 * < 0 if ahi|alo < bhi|blo
2576 */
2577static inline int ucmp128(uint64_t alo, uint64_t ahi,
2578 uint64_t blo, uint64_t bhi)
2579{
2580 return (ahi == bhi) ?
2581 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2582 (ahi > bhi ? 1 : -1);
2583}
2584
a406c058
JRZ
2585uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2586{
2587 int i;
a3d67f3e 2588 int cr;
a406c058
JRZ
2589 uint64_t lo_value;
2590 uint64_t hi_value;
40f3e79a 2591 uint64_t rem;
a406c058
JRZ
2592 ppc_avr_t ret = { .u64 = { 0, 0 } };
2593
3c385a93
MCA
2594 if (b->VsrSD(0) < 0) {
2595 lo_value = -b->VsrSD(1);
2596 hi_value = ~b->VsrD(0) + !lo_value;
a406c058 2597 bcd_put_digit(&ret, 0xD, 0);
a3d67f3e
LP
2598
2599 cr = CRF_LT;
a406c058 2600 } else {
3c385a93
MCA
2601 lo_value = b->VsrD(1);
2602 hi_value = b->VsrD(0);
a406c058 2603 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
a406c058 2604
a3d67f3e
LP
2605 if (hi_value == 0 && lo_value == 0) {
2606 cr = CRF_EQ;
2607 } else {
2608 cr = CRF_GT;
2609 }
a406c058
JRZ
2610 }
2611
a3d67f3e
LP
2612 /*
2613 * Check src limits: abs(src) <= 10^31 - 1
2614 *
2615 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2616 */
2617 if (ucmp128(lo_value, hi_value,
2618 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2619 cr |= CRF_SO;
a406c058 2620
a3d67f3e
LP
2621 /*
2622 * According to the ISA, if src wouldn't fit in the destination
2623 * register, the result is undefined.
2624 * In that case, we leave r unchanged.
2625 */
2626 } else {
40f3e79a 2627 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
a406c058 2628
40f3e79a
LP
2629 for (i = 1; i < 16; rem /= 10, i++) {
2630 bcd_put_digit(&ret, rem % 10, i);
a3d67f3e 2631 }
a406c058 2632
a3d67f3e
LP
2633 for (; i < 32; lo_value /= 10, i++) {
2634 bcd_put_digit(&ret, lo_value % 10, i);
2635 }
2636
2637 *r = ret;
2638 }
a406c058
JRZ
2639
2640 return cr;
2641}
2642
c85bc7dd
JRZ
2643uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2644{
2645 uint8_t i;
2646 int cr;
2647 uint64_t carry;
2648 uint64_t unused;
2649 uint64_t lo_value;
2650 uint64_t hi_value = 0;
2651 int sgnb = bcd_get_sgn(b);
2652 int invalid = (sgnb == 0);
2653
2654 lo_value = bcd_get_digit(b, 31, &invalid);
2655 for (i = 30; i > 0; i--) {
2656 mulu64(&lo_value, &carry, lo_value, 10ULL);
2657 mulu64(&hi_value, &unused, hi_value, 10ULL);
2658 lo_value += bcd_get_digit(b, i, &invalid);
2659 hi_value += carry;
2660
2661 if (unlikely(invalid)) {
2662 break;
2663 }
2664 }
2665
2666 if (sgnb == -1) {
3c385a93
MCA
2667 r->VsrSD(1) = -lo_value;
2668 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
c85bc7dd 2669 } else {
3c385a93
MCA
2670 r->VsrSD(1) = lo_value;
2671 r->VsrSD(0) = hi_value;
c85bc7dd
JRZ
2672 }
2673
2674 cr = bcd_cmp_zero(b);
2675
2676 if (unlikely(invalid)) {
2677 cr = CRF_SO;
2678 }
2679
2680 return cr;
2681}
2682
c3025c3b
JRZ
2683uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2684{
2685 int i;
2686 int invalid = 0;
2687
2688 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2689 return CRF_SO;
2690 }
2691
2692 *r = *a;
428115c3 2693 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
c3025c3b
JRZ
2694
2695 for (i = 1; i < 32; i++) {
2696 bcd_get_digit(a, i, &invalid);
2697 bcd_get_digit(b, i, &invalid);
2698 if (unlikely(invalid)) {
2699 return CRF_SO;
2700 }
2701 }
2702
2703 return bcd_cmp_zero(r);
2704}
2705
466a3f9c
JRZ
2706uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2707{
466a3f9c
JRZ
2708 int sgnb = bcd_get_sgn(b);
2709
2710 *r = *b;
2711 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2712
071663df
JRZ
2713 if (bcd_is_valid(b) == false) {
2714 return CRF_SO;
466a3f9c
JRZ
2715 }
2716
2717 return bcd_cmp_zero(r);
2718}
2719
e04797f7
JRZ
2720uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2721{
2722 int cr;
428115c3 2723 int i = a->VsrSB(7);
e04797f7
JRZ
2724 bool ox_flag = false;
2725 int sgnb = bcd_get_sgn(b);
2726 ppc_avr_t ret = *b;
3c385a93 2727 ret.VsrD(1) &= ~0xf;
e04797f7
JRZ
2728
2729 if (bcd_is_valid(b) == false) {
2730 return CRF_SO;
2731 }
2732
2733 if (unlikely(i > 31)) {
2734 i = 31;
2735 } else if (unlikely(i < -31)) {
2736 i = -31;
2737 }
2738
2739 if (i > 0) {
3c385a93 2740 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
e04797f7 2741 } else {
3c385a93 2742 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
e04797f7
JRZ
2743 }
2744 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2745
2746 *r = ret;
2747
2748 cr = bcd_cmp_zero(r);
2749 if (ox_flag) {
2750 cr |= CRF_SO;
2751 }
2752
2753 return cr;
2754}
2755
a49a95e9
JRZ
2756uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2757{
2758 int cr;
2759 int i;
2760 int invalid = 0;
2761 bool ox_flag = false;
2762 ppc_avr_t ret = *b;
2763
2764 for (i = 0; i < 32; i++) {
2765 bcd_get_digit(b, i, &invalid);
2766
2767 if (unlikely(invalid)) {
2768 return CRF_SO;
2769 }
2770 }
2771
428115c3 2772 i = a->VsrSB(7);
a49a95e9
JRZ
2773 if (i >= 32) {
2774 ox_flag = true;
3c385a93 2775 ret.VsrD(1) = ret.VsrD(0) = 0;
a49a95e9 2776 } else if (i <= -32) {
3c385a93 2777 ret.VsrD(1) = ret.VsrD(0) = 0;
a49a95e9 2778 } else if (i > 0) {
3c385a93 2779 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
a49a95e9 2780 } else {
3c385a93 2781 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
a49a95e9
JRZ
2782 }
2783 *r = ret;
2784
2785 cr = bcd_cmp_zero(r);
2786 if (ox_flag) {
2787 cr |= CRF_SO;
2788 }
2789
2790 return cr;
2791}
2792
a54238ad
JRZ
2793uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2794{
2795 int cr;
2796 int unused = 0;
2797 int invalid = 0;
2798 bool ox_flag = false;
2799 int sgnb = bcd_get_sgn(b);
2800 ppc_avr_t ret = *b;
3c385a93 2801 ret.VsrD(1) &= ~0xf;
a54238ad 2802
428115c3
MCA
2803 int i = a->VsrSB(7);
2804 ppc_avr_t bcd_one;
2805
2806 bcd_one.VsrD(0) = 0;
2807 bcd_one.VsrD(1) = 0x10;
a54238ad
JRZ
2808
2809 if (bcd_is_valid(b) == false) {
2810 return CRF_SO;
2811 }
2812
2813 if (unlikely(i > 31)) {
2814 i = 31;
2815 } else if (unlikely(i < -31)) {
2816 i = -31;
2817 }
2818
2819 if (i > 0) {
3c385a93 2820 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
a54238ad 2821 } else {
3c385a93 2822 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
a54238ad
JRZ
2823
2824 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2825 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2826 }
2827 }
2828 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2829
2830 cr = bcd_cmp_zero(&ret);
2831 if (ox_flag) {
2832 cr |= CRF_SO;
2833 }
2834 *r = ret;
2835
2836 return cr;
2837}
2838
31bc4d11
JRZ
2839uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2840{
2841 uint64_t mask;
2842 uint32_t ox_flag = 0;
428115c3 2843 int i = a->VsrSH(3) + 1;
31bc4d11
JRZ
2844 ppc_avr_t ret = *b;
2845
2846 if (bcd_is_valid(b) == false) {
2847 return CRF_SO;
2848 }
2849
2850 if (i > 16 && i < 32) {
2851 mask = (uint64_t)-1 >> (128 - i * 4);
3c385a93 2852 if (ret.VsrD(0) & ~mask) {
31bc4d11
JRZ
2853 ox_flag = CRF_SO;
2854 }
2855
3c385a93 2856 ret.VsrD(0) &= mask;
31bc4d11
JRZ
2857 } else if (i >= 0 && i <= 16) {
2858 mask = (uint64_t)-1 >> (64 - i * 4);
3c385a93 2859 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
31bc4d11
JRZ
2860 ox_flag = CRF_SO;
2861 }
2862
3c385a93
MCA
2863 ret.VsrD(1) &= mask;
2864 ret.VsrD(0) = 0;
31bc4d11
JRZ
2865 }
2866 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2867 *r = ret;
2868
2869 return bcd_cmp_zero(&ret) | ox_flag;
2870}
2871
5c32e2e4
JRZ
2872uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2873{
2874 int i;
2875 uint64_t mask;
2876 uint32_t ox_flag = 0;
2877 int invalid = 0;
2878 ppc_avr_t ret = *b;
2879
2880 for (i = 0; i < 32; i++) {
2881 bcd_get_digit(b, i, &invalid);
2882
2883 if (unlikely(invalid)) {
2884 return CRF_SO;
2885 }
2886 }
2887
428115c3 2888 i = a->VsrSH(3);
5c32e2e4
JRZ
2889 if (i > 16 && i < 33) {
2890 mask = (uint64_t)-1 >> (128 - i * 4);
3c385a93 2891 if (ret.VsrD(0) & ~mask) {
5c32e2e4
JRZ
2892 ox_flag = CRF_SO;
2893 }
2894
3c385a93 2895 ret.VsrD(0) &= mask;
5c32e2e4
JRZ
2896 } else if (i > 0 && i <= 16) {
2897 mask = (uint64_t)-1 >> (64 - i * 4);
3c385a93 2898 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
5c32e2e4
JRZ
2899 ox_flag = CRF_SO;
2900 }
2901
3c385a93
MCA
2902 ret.VsrD(1) &= mask;
2903 ret.VsrD(0) = 0;
5c32e2e4 2904 } else if (i == 0) {
3c385a93 2905 if (ret.VsrD(0) || ret.VsrD(1)) {
5c32e2e4
JRZ
2906 ox_flag = CRF_SO;
2907 }
3c385a93 2908 ret.VsrD(0) = ret.VsrD(1) = 0;
5c32e2e4
JRZ
2909 }
2910
2911 *r = ret;
3c385a93 2912 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
5c32e2e4
JRZ
2913 return ox_flag | CRF_EQ;
2914 }
2915
2916 return ox_flag | CRF_GT;
2917}
2918
c1542453 2919void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2920{
2921 int i;
2922 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2923 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2924 }
2925}
2926
c1542453 2927void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2928{
ce9f5b37
RH
2929 AESState *ad = (AESState *)r;
2930 AESState *st = (AESState *)a;
2931 AESState *rk = (AESState *)b;
557d52fa 2932
ce9f5b37 2933 aesenc_SB_SR_MC_AK(ad, st, rk, true);
557d52fa
TM
2934}
2935
557d52fa
TM
2936void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2937{
7df34e48 2938 aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
557d52fa
TM
2939}
2940
2941void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2942{
af4cb945
RH
2943 AESState *ad = (AESState *)r;
2944 AESState *st = (AESState *)a;
2945 AESState *rk = (AESState *)b;
c1542453 2946
af4cb945 2947 aesdec_ISB_ISR_AK_IMC(ad, st, rk, true);
557d52fa
TM
2948}
2949
2950void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2951{
2cf44f3b 2952 aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
557d52fa
TM
2953}
2954
57354f8f
TM
2955void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2956{
2957 int st = (st_six & 0x10) != 0;
2958 int six = st_six & 0xF;
2959 int i;
2960
730d2ca3 2961 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
57354f8f
TM
2962 if (st == 0) {
2963 if ((six & (0x8 >> i)) == 0) {
0ef83bf2
MCA
2964 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2965 ror32(a->VsrW(i), 18) ^
730d2ca3 2966 (a->VsrW(i) >> 3);
57354f8f 2967 } else { /* six.bit[i] == 1 */
0ef83bf2
MCA
2968 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2969 ror32(a->VsrW(i), 19) ^
730d2ca3 2970 (a->VsrW(i) >> 10);
57354f8f
TM
2971 }
2972 } else { /* st == 1 */
2973 if ((six & (0x8 >> i)) == 0) {
0ef83bf2
MCA
2974 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2975 ror32(a->VsrW(i), 13) ^
2976 ror32(a->VsrW(i), 22);
57354f8f 2977 } else { /* six.bit[i] == 1 */
0ef83bf2
MCA
2978 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2979 ror32(a->VsrW(i), 11) ^
2980 ror32(a->VsrW(i), 25);
57354f8f
TM
2981 }
2982 }
2983 }
2984}
2985
57354f8f
TM
2986void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2987{
2988 int st = (st_six & 0x10) != 0;
2989 int six = st_six & 0xF;
2990 int i;
2991
730d2ca3 2992 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
57354f8f 2993 if (st == 0) {
b6cb41b2 2994 if ((six & (0x8 >> (2 * i))) == 0) {
0ef83bf2
MCA
2995 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2996 ror64(a->VsrD(i), 8) ^
730d2ca3 2997 (a->VsrD(i) >> 7);
57354f8f 2998 } else { /* six.bit[2*i] == 1 */
0ef83bf2
MCA
2999 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3000 ror64(a->VsrD(i), 61) ^
730d2ca3 3001 (a->VsrD(i) >> 6);
57354f8f
TM
3002 }
3003 } else { /* st == 1 */
b6cb41b2 3004 if ((six & (0x8 >> (2 * i))) == 0) {
0ef83bf2
MCA
3005 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3006 ror64(a->VsrD(i), 34) ^
3007 ror64(a->VsrD(i), 39);
57354f8f 3008 } else { /* six.bit[2*i] == 1 */
0ef83bf2
MCA
3009 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3010 ror64(a->VsrD(i), 18) ^
3011 ror64(a->VsrD(i), 41);
57354f8f
TM
3012 }
3013 }
3014 }
3015}
3016
ac174549
TM
3017void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3018{
65cf1f65 3019 ppc_avr_t result;
ac174549 3020 int i;
65cf1f65 3021
60594fea
MCA
3022 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3023 int indexA = c->VsrB(i) >> 4;
3024 int indexB = c->VsrB(i) & 0xF;
3025
3026 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
ac174549 3027 }
65cf1f65 3028 *r = result;
ac174549
TM
3029}
3030
64654ded 3031#undef VECTOR_FOR_INORDER_I
64654ded
BS
3032
3033/*****************************************************************************/
3034/* SPE extension helpers */
3035/* Use a table to make this quicker */
ea6c0dac 3036static const uint8_t hbrev[16] = {
64654ded
BS
3037 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3038 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3039};
3040
3041static inline uint8_t byte_reverse(uint8_t val)
3042{
3043 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3044}
3045
3046static inline uint32_t word_reverse(uint32_t val)
3047{
3048 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3049 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3050}
3051
3052#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3053target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3054{
3055 uint32_t a, b, d, mask;
3056
3057 mask = UINT32_MAX >> (32 - MASKBITS);
3058 a = arg1 & mask;
3059 b = arg2 & mask;
3060 d = word_reverse(1 + word_reverse(a | ~b));
3061 return (arg1 & ~mask) | (d & b);
3062}
3063
3064uint32_t helper_cntlsw32(uint32_t val)
3065{
3066 if (val & 0x80000000) {
3067 return clz32(~val);
3068 } else {
3069 return clz32(val);
3070 }
3071}
3072
3073uint32_t helper_cntlzw32(uint32_t val)
3074{
3075 return clz32(val);
3076}
3077
3078/* 440 specific */
d15f74fb
BS
3079target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3080 target_ulong low, uint32_t update_Rc)
64654ded
BS
3081{
3082 target_ulong mask;
3083 int i;
3084
3085 i = 1;
3086 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3087 if ((high & mask) == 0) {
3088 if (update_Rc) {
3089 env->crf[0] = 0x4;
3090 }
3091 goto done;
3092 }
3093 i++;
3094 }
3095 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3096 if ((low & mask) == 0) {
3097 if (update_Rc) {
3098 env->crf[0] = 0x8;
3099 }
3100 goto done;
3101 }
3102 i++;
3103 }
ebbd8b40 3104 i = 8;
64654ded
BS
3105 if (update_Rc) {
3106 env->crf[0] = 0x2;
3107 }
3108 done:
3109 env->xer = (env->xer & ~0x7F) | i;
3110 if (update_Rc) {
3111 env->crf[0] |= xer_so;
3112 }
3113 return i;
3114}