]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
Merge tag 'pull-ppc-20221029' of https://gitlab.com/danielhb/qemu into staging
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
32
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
36
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 {
39 if (unlikely(ov)) {
40 env->so = env->ov = env->ov32 = 1;
41 } else {
42 env->ov = env->ov32 = 0;
43 }
44 }
45
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48 {
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
68 }
69
70 return (target_ulong)rt;
71 }
72
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75 {
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
96 }
97
98 return (target_ulong)rt;
99 }
100
101 #if defined(TARGET_PPC64)
102
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 {
105 uint64_t rt = 0;
106 int overflow = 0;
107
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
113 }
114
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
117 }
118
119 return rt;
120 }
121
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 {
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
128
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
134 }
135
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
138 }
139
140 return rt;
141 }
142
143 #endif
144
145
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
150 /*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 {
163 return hasvalue(rb, ra) ? CRF_GT : 0;
164 }
165
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
169
170 /*
171 * Return a random number.
172 */
173 uint64_t helper_darn32(void)
174 {
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
184
185 return ret;
186 }
187
188 uint64_t helper_darn64(void)
189 {
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
199
200 return ret;
201 }
202
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 {
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217 }
218
219 #endif
220
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 {
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234 }
235
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
239 {
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
250 }
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
254 }
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
258 }
259 return (target_long)ret;
260 }
261
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
265 {
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
276 }
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
280 }
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
284 }
285 return ret;
286 }
287 #endif
288
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
291 {
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300 }
301
302 target_ulong helper_popcntw(target_ulong val)
303 {
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316 }
317 #else
318 target_ulong helper_popcntb(target_ulong val)
319 {
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325 }
326 #endif
327
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
329 {
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388 }
389
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 {
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406 }
407
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 {
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424 }
425
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if HOST_BIG_ENDIAN
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
435
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
478
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 {
481 ppc_store_vscr(env, vscr);
482 }
483
484 uint32_t helper_mfvscr(CPUPPCState *env)
485 {
486 return ppc_get_vscr(env);
487 }
488
489 static inline void set_vscr_sat(CPUPPCState *env)
490 {
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
493 }
494
495 /* vprtybq */
496 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
497 {
498 uint64_t res = b->u64[0] ^ b->u64[1];
499 res ^= res >> 32;
500 res ^= res >> 16;
501 res ^= res >> 8;
502 r->VsrD(1) = res & 1;
503 r->VsrD(0) = 0;
504 }
505
506 #define VARITHFP(suffix, func) \
507 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
508 ppc_avr_t *b) \
509 { \
510 int i; \
511 \
512 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
513 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
514 } \
515 }
516 VARITHFP(addfp, float32_add)
517 VARITHFP(subfp, float32_sub)
518 VARITHFP(minfp, float32_min)
519 VARITHFP(maxfp, float32_max)
520 #undef VARITHFP
521
522 #define VARITHFPFMA(suffix, type) \
523 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
524 ppc_avr_t *b, ppc_avr_t *c) \
525 { \
526 int i; \
527 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
528 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
529 type, &env->vec_status); \
530 } \
531 }
532 VARITHFPFMA(maddfp, 0);
533 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
534 #undef VARITHFPFMA
535
536 #define VARITHSAT_CASE(type, op, cvt, element) \
537 { \
538 type result = (type)a->element[i] op (type)b->element[i]; \
539 r->element[i] = cvt(result, &sat); \
540 }
541
542 #define VARITHSAT_DO(name, op, optype, cvt, element) \
543 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
544 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
545 { \
546 int sat = 0; \
547 int i; \
548 \
549 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
550 VARITHSAT_CASE(optype, op, cvt, element); \
551 } \
552 if (sat) { \
553 vscr_sat->u32[0] = 1; \
554 } \
555 }
556 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
557 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
558 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
559 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
560 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
561 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
562 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
563 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
564 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
565 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
566 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
567 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
568 #undef VARITHSAT_CASE
569 #undef VARITHSAT_DO
570 #undef VARITHSAT_SIGNED
571 #undef VARITHSAT_UNSIGNED
572
573 #define VAVG(name, element, etype) \
574 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
575 { \
576 int i; \
577 \
578 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
579 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
580 r->element[i] = x >> 1; \
581 } \
582 }
583
584 VAVG(VAVGSB, s8, int16_t)
585 VAVG(VAVGUB, u8, uint16_t)
586 VAVG(VAVGSH, s16, int32_t)
587 VAVG(VAVGUH, u16, uint32_t)
588 VAVG(VAVGSW, s32, int64_t)
589 VAVG(VAVGUW, u32, uint64_t)
590 #undef VAVG
591
592 #define VABSDU(name, element) \
593 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
594 { \
595 int i; \
596 \
597 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
598 r->element[i] = (a->element[i] > b->element[i]) ? \
599 (a->element[i] - b->element[i]) : \
600 (b->element[i] - a->element[i]); \
601 } \
602 }
603
604 /*
605 * VABSDU - Vector absolute difference unsigned
606 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
607 * element - element type to access from vector
608 */
609 VABSDU(VABSDUB, u8)
610 VABSDU(VABSDUH, u16)
611 VABSDU(VABSDUW, u32)
612 #undef VABSDU
613
614 #define VCF(suffix, cvt, element) \
615 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
616 ppc_avr_t *b, uint32_t uim) \
617 { \
618 int i; \
619 \
620 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
621 float32 t = cvt(b->element[i], &env->vec_status); \
622 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
623 } \
624 }
625 VCF(ux, uint32_to_float32, u32)
626 VCF(sx, int32_to_float32, s32)
627 #undef VCF
628
629 #define VCMPNEZ(NAME, ELEM) \
630 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
631 { \
632 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
633 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
634 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
635 } \
636 }
637 VCMPNEZ(VCMPNEZB, u8)
638 VCMPNEZ(VCMPNEZH, u16)
639 VCMPNEZ(VCMPNEZW, u32)
640 #undef VCMPNEZ
641
642 #define VCMPFP_DO(suffix, compare, order, record) \
643 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
644 ppc_avr_t *a, ppc_avr_t *b) \
645 { \
646 uint32_t ones = (uint32_t)-1; \
647 uint32_t all = ones; \
648 uint32_t none = 0; \
649 int i; \
650 \
651 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
652 uint32_t result; \
653 FloatRelation rel = \
654 float32_compare_quiet(a->f32[i], b->f32[i], \
655 &env->vec_status); \
656 if (rel == float_relation_unordered) { \
657 result = 0; \
658 } else if (rel compare order) { \
659 result = ones; \
660 } else { \
661 result = 0; \
662 } \
663 r->u32[i] = result; \
664 all &= result; \
665 none |= result; \
666 } \
667 if (record) { \
668 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
669 } \
670 }
671 #define VCMPFP(suffix, compare, order) \
672 VCMPFP_DO(suffix, compare, order, 0) \
673 VCMPFP_DO(suffix##_dot, compare, order, 1)
674 VCMPFP(eqfp, ==, float_relation_equal)
675 VCMPFP(gefp, !=, float_relation_less)
676 VCMPFP(gtfp, ==, float_relation_greater)
677 #undef VCMPFP_DO
678 #undef VCMPFP
679
680 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
681 ppc_avr_t *a, ppc_avr_t *b, int record)
682 {
683 int i;
684 int all_in = 0;
685
686 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
687 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
688 &env->vec_status);
689 if (le_rel == float_relation_unordered) {
690 r->u32[i] = 0xc0000000;
691 all_in = 1;
692 } else {
693 float32 bneg = float32_chs(b->f32[i]);
694 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
695 &env->vec_status);
696 int le = le_rel != float_relation_greater;
697 int ge = ge_rel != float_relation_less;
698
699 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
700 all_in |= (!le | !ge);
701 }
702 }
703 if (record) {
704 env->crf[6] = (all_in == 0) << 1;
705 }
706 }
707
708 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
709 {
710 vcmpbfp_internal(env, r, a, b, 0);
711 }
712
713 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
714 ppc_avr_t *b)
715 {
716 vcmpbfp_internal(env, r, a, b, 1);
717 }
718
719 #define VCT(suffix, satcvt, element) \
720 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
721 ppc_avr_t *b, uint32_t uim) \
722 { \
723 int i; \
724 int sat = 0; \
725 float_status s = env->vec_status; \
726 \
727 set_float_rounding_mode(float_round_to_zero, &s); \
728 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
729 if (float32_is_any_nan(b->f32[i])) { \
730 r->element[i] = 0; \
731 } else { \
732 float64 t = float32_to_float64(b->f32[i], &s); \
733 int64_t j; \
734 \
735 t = float64_scalbn(t, uim, &s); \
736 j = float64_to_int64(t, &s); \
737 r->element[i] = satcvt(j, &sat); \
738 } \
739 } \
740 if (sat) { \
741 set_vscr_sat(env); \
742 } \
743 }
744 VCT(uxs, cvtsduw, u32)
745 VCT(sxs, cvtsdsw, s32)
746 #undef VCT
747
748 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
749
750 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
751 {
752 int64_t psum = 0;
753 for (int i = 0; i < 8; i++, mask >>= 1) {
754 if (mask & 1) {
755 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
756 }
757 }
758 return psum;
759 }
760
761 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
762 {
763 int64_t psum = 0;
764 for (int i = 0; i < 4; i++, mask >>= 1) {
765 if (mask & 1) {
766 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
767 }
768 }
769 return psum;
770 }
771
772 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
773 {
774 int64_t psum = 0;
775 for (int i = 0; i < 2; i++, mask >>= 1) {
776 if (mask & 1) {
777 psum += (int64_t)sextract32(a, 16 * i, 16) *
778 sextract32(b, 16 * i, 16);
779 }
780 }
781 return psum;
782 }
783
784 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
785 uint32_t mask, bool sat, bool acc, do_ger ger)
786 {
787 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
788 xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
789 ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
790 uint8_t xmsk_bit, ymsk_bit;
791 int64_t psum;
792 int i, j;
793 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
794 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
795 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
796 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
797 if (acc) {
798 psum += at[i].VsrSW(j);
799 }
800 if (sat && psum > INT32_MAX) {
801 set_vscr_sat(env);
802 at[i].VsrSW(j) = INT32_MAX;
803 } else if (sat && psum < INT32_MIN) {
804 set_vscr_sat(env);
805 at[i].VsrSW(j) = INT32_MIN;
806 } else {
807 at[i].VsrSW(j) = (int32_t) psum;
808 }
809 } else {
810 at[i].VsrSW(j) = 0;
811 }
812 }
813 }
814 }
815
816 QEMU_FLATTEN
817 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
818 ppc_acc_t *at, uint32_t mask)
819 {
820 xviger(env, a, b, at, mask, false, false, ger_rank8);
821 }
822
823 QEMU_FLATTEN
824 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
825 ppc_acc_t *at, uint32_t mask)
826 {
827 xviger(env, a, b, at, mask, false, true, ger_rank8);
828 }
829
830 QEMU_FLATTEN
831 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
832 ppc_acc_t *at, uint32_t mask)
833 {
834 xviger(env, a, b, at, mask, false, false, ger_rank4);
835 }
836
837 QEMU_FLATTEN
838 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
839 ppc_acc_t *at, uint32_t mask)
840 {
841 xviger(env, a, b, at, mask, false, true, ger_rank4);
842 }
843
844 QEMU_FLATTEN
845 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
846 ppc_acc_t *at, uint32_t mask)
847 {
848 xviger(env, a, b, at, mask, true, true, ger_rank4);
849 }
850
851 QEMU_FLATTEN
852 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
853 ppc_acc_t *at, uint32_t mask)
854 {
855 xviger(env, a, b, at, mask, false, false, ger_rank2);
856 }
857
858 QEMU_FLATTEN
859 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
860 ppc_acc_t *at, uint32_t mask)
861 {
862 xviger(env, a, b, at, mask, true, false, ger_rank2);
863 }
864
865 QEMU_FLATTEN
866 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
867 ppc_acc_t *at, uint32_t mask)
868 {
869 xviger(env, a, b, at, mask, false, true, ger_rank2);
870 }
871
872 QEMU_FLATTEN
873 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
874 ppc_acc_t *at, uint32_t mask)
875 {
876 xviger(env, a, b, at, mask, true, true, ger_rank2);
877 }
878
879 target_ulong helper_vclzlsbb(ppc_avr_t *r)
880 {
881 target_ulong count = 0;
882 int i;
883 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
884 if (r->VsrB(i) & 0x01) {
885 break;
886 }
887 count++;
888 }
889 return count;
890 }
891
892 target_ulong helper_vctzlsbb(ppc_avr_t *r)
893 {
894 target_ulong count = 0;
895 int i;
896 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
897 if (r->VsrB(i) & 0x01) {
898 break;
899 }
900 count++;
901 }
902 return count;
903 }
904
905 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
906 ppc_avr_t *b, ppc_avr_t *c)
907 {
908 int sat = 0;
909 int i;
910
911 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
912 int32_t prod = a->s16[i] * b->s16[i];
913 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
914
915 r->s16[i] = cvtswsh(t, &sat);
916 }
917
918 if (sat) {
919 set_vscr_sat(env);
920 }
921 }
922
923 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
924 ppc_avr_t *b, ppc_avr_t *c)
925 {
926 int sat = 0;
927 int i;
928
929 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
930 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
931 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
932 r->s16[i] = cvtswsh(t, &sat);
933 }
934
935 if (sat) {
936 set_vscr_sat(env);
937 }
938 }
939
940 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
941 uint32_t v)
942 {
943 int i;
944
945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
946 int32_t prod = a->s16[i] * b->s16[i];
947 r->s16[i] = (int16_t) (prod + c->s16[i]);
948 }
949 }
950
951 #define VMRG_DO(name, element, access, ofs) \
952 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
953 { \
954 ppc_avr_t result; \
955 int i, half = ARRAY_SIZE(r->element) / 2; \
956 \
957 for (i = 0; i < half; i++) { \
958 result.access(i * 2 + 0) = a->access(i + ofs); \
959 result.access(i * 2 + 1) = b->access(i + ofs); \
960 } \
961 *r = result; \
962 }
963
964 #define VMRG(suffix, element, access) \
965 VMRG_DO(mrgl##suffix, element, access, half) \
966 VMRG_DO(mrgh##suffix, element, access, 0)
967 VMRG(b, u8, VsrB)
968 VMRG(h, u16, VsrH)
969 VMRG(w, u32, VsrW)
970 #undef VMRG_DO
971 #undef VMRG
972
973 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
974 {
975 int32_t prod[16];
976 int i;
977
978 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
979 prod[i] = (int32_t)a->s8[i] * b->u8[i];
980 }
981
982 VECTOR_FOR_INORDER_I(i, s32) {
983 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
984 prod[4 * i + 2] + prod[4 * i + 3];
985 }
986 }
987
988 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
989 {
990 int32_t prod[8];
991 int i;
992
993 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
994 prod[i] = a->s16[i] * b->s16[i];
995 }
996
997 VECTOR_FOR_INORDER_I(i, s32) {
998 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
999 }
1000 }
1001
1002 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1003 ppc_avr_t *b, ppc_avr_t *c)
1004 {
1005 int32_t prod[8];
1006 int i;
1007 int sat = 0;
1008
1009 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1010 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1011 }
1012
1013 VECTOR_FOR_INORDER_I(i, s32) {
1014 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1015
1016 r->u32[i] = cvtsdsw(t, &sat);
1017 }
1018
1019 if (sat) {
1020 set_vscr_sat(env);
1021 }
1022 }
1023
1024 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1025 {
1026 uint16_t prod[16];
1027 int i;
1028
1029 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1030 prod[i] = a->u8[i] * b->u8[i];
1031 }
1032
1033 VECTOR_FOR_INORDER_I(i, u32) {
1034 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1035 prod[4 * i + 2] + prod[4 * i + 3];
1036 }
1037 }
1038
1039 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1040 {
1041 uint32_t prod[8];
1042 int i;
1043
1044 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1045 prod[i] = a->u16[i] * b->u16[i];
1046 }
1047
1048 VECTOR_FOR_INORDER_I(i, u32) {
1049 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1050 }
1051 }
1052
1053 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1054 ppc_avr_t *b, ppc_avr_t *c)
1055 {
1056 uint32_t prod[8];
1057 int i;
1058 int sat = 0;
1059
1060 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1061 prod[i] = a->u16[i] * b->u16[i];
1062 }
1063
1064 VECTOR_FOR_INORDER_I(i, s32) {
1065 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1066
1067 r->u32[i] = cvtuduw(t, &sat);
1068 }
1069
1070 if (sat) {
1071 set_vscr_sat(env);
1072 }
1073 }
1074
1075 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1076 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1077 { \
1078 int i; \
1079 \
1080 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1081 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1082 (cast)b->mul_access(i); \
1083 } \
1084 }
1085
1086 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1087 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1088 { \
1089 int i; \
1090 \
1091 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1092 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1093 (cast)b->mul_access(i + 1); \
1094 } \
1095 }
1096
1097 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1098 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1099 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1100 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1101 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1102 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1103 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1104 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1105 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1106 #undef VMUL_DO_EVN
1107 #undef VMUL_DO_ODD
1108 #undef VMUL
1109
1110 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1111 target_ulong uim)
1112 {
1113 int i, idx;
1114 ppc_vsr_t tmp = { .u64 = {0, 0} };
1115
1116 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1117 if ((pcv->VsrB(i) >> 5) == uim) {
1118 idx = pcv->VsrB(i) & 0x1f;
1119 if (idx < ARRAY_SIZE(t->u8)) {
1120 tmp.VsrB(i) = s0->VsrB(idx);
1121 } else {
1122 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1123 }
1124 }
1125 }
1126
1127 *t = tmp;
1128 }
1129
1130 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1131 {
1132 Int128 neg1 = int128_makes64(-1);
1133 Int128 int128_min = int128_make128(0, INT64_MIN);
1134 if (likely(int128_nz(b->s128) &&
1135 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1136 t->s128 = int128_divs(a->s128, b->s128);
1137 } else {
1138 t->s128 = a->s128; /* Undefined behavior */
1139 }
1140 }
1141
1142 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1143 {
1144 if (int128_nz(b->s128)) {
1145 t->s128 = int128_divu(a->s128, b->s128);
1146 } else {
1147 t->s128 = a->s128; /* Undefined behavior */
1148 }
1149 }
1150
1151 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1152 {
1153 int i;
1154 int64_t high;
1155 uint64_t low;
1156 for (i = 0; i < 2; i++) {
1157 high = a->s64[i];
1158 low = 0;
1159 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1160 t->s64[i] = a->s64[i]; /* Undefined behavior */
1161 } else {
1162 divs128(&low, &high, b->s64[i]);
1163 t->s64[i] = low;
1164 }
1165 }
1166 }
1167
1168 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1169 {
1170 int i;
1171 uint64_t high, low;
1172 for (i = 0; i < 2; i++) {
1173 high = a->u64[i];
1174 low = 0;
1175 if (unlikely(!b->u64[i])) {
1176 t->u64[i] = a->u64[i]; /* Undefined behavior */
1177 } else {
1178 divu128(&low, &high, b->u64[i]);
1179 t->u64[i] = low;
1180 }
1181 }
1182 }
1183
1184 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1185 {
1186 Int128 high, low;
1187 Int128 int128_min = int128_make128(0, INT64_MIN);
1188 Int128 neg1 = int128_makes64(-1);
1189
1190 high = a->s128;
1191 low = int128_zero();
1192 if (unlikely(!int128_nz(b->s128) ||
1193 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1194 t->s128 = a->s128; /* Undefined behavior */
1195 } else {
1196 divs256(&low, &high, b->s128);
1197 t->s128 = low;
1198 }
1199 }
1200
1201 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1202 {
1203 Int128 high, low;
1204
1205 high = a->s128;
1206 low = int128_zero();
1207 if (unlikely(!int128_nz(b->s128))) {
1208 t->s128 = a->s128; /* Undefined behavior */
1209 } else {
1210 divu256(&low, &high, b->s128);
1211 t->s128 = low;
1212 }
1213 }
1214
1215 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1216 {
1217 Int128 neg1 = int128_makes64(-1);
1218 Int128 int128_min = int128_make128(0, INT64_MIN);
1219 if (likely(int128_nz(b->s128) &&
1220 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1221 t->s128 = int128_rems(a->s128, b->s128);
1222 } else {
1223 t->s128 = int128_zero(); /* Undefined behavior */
1224 }
1225 }
1226
1227 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1228 {
1229 if (likely(int128_nz(b->s128))) {
1230 t->s128 = int128_remu(a->s128, b->s128);
1231 } else {
1232 t->s128 = int128_zero(); /* Undefined behavior */
1233 }
1234 }
1235
1236 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1237 {
1238 ppc_avr_t result;
1239 int i;
1240
1241 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1242 int s = c->VsrB(i) & 0x1f;
1243 int index = s & 0xf;
1244
1245 if (s & 0x10) {
1246 result.VsrB(i) = b->VsrB(index);
1247 } else {
1248 result.VsrB(i) = a->VsrB(index);
1249 }
1250 }
1251 *r = result;
1252 }
1253
1254 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1255 {
1256 ppc_avr_t result;
1257 int i;
1258
1259 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1260 int s = c->VsrB(i) & 0x1f;
1261 int index = 15 - (s & 0xf);
1262
1263 if (s & 0x10) {
1264 result.VsrB(i) = a->VsrB(index);
1265 } else {
1266 result.VsrB(i) = b->VsrB(index);
1267 }
1268 }
1269 *r = result;
1270 }
1271
1272 #define XXGENPCV_BE_EXP(NAME, SZ) \
1273 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1274 { \
1275 ppc_vsr_t tmp; \
1276 \
1277 /* Initialize tmp with the result of an all-zeros mask */ \
1278 tmp.VsrD(0) = 0x1011121314151617; \
1279 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1280 \
1281 /* Iterate over the most significant byte of each element */ \
1282 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1283 if (b->VsrB(i) & 0x80) { \
1284 /* Update each byte of the element */ \
1285 for (int k = 0; k < SZ; k++) { \
1286 tmp.VsrB(i + k) = j + k; \
1287 } \
1288 j += SZ; \
1289 } \
1290 } \
1291 \
1292 *t = tmp; \
1293 }
1294
1295 #define XXGENPCV_BE_COMP(NAME, SZ) \
1296 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1297 { \
1298 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1299 \
1300 /* Iterate over the most significant byte of each element */ \
1301 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1302 if (b->VsrB(i) & 0x80) { \
1303 /* Update each byte of the element */ \
1304 for (int k = 0; k < SZ; k++) { \
1305 tmp.VsrB(j + k) = i + k; \
1306 } \
1307 j += SZ; \
1308 } \
1309 } \
1310 \
1311 *t = tmp; \
1312 }
1313
1314 #define XXGENPCV_LE_EXP(NAME, SZ) \
1315 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1316 { \
1317 ppc_vsr_t tmp; \
1318 \
1319 /* Initialize tmp with the result of an all-zeros mask */ \
1320 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1321 tmp.VsrD(1) = 0x1716151413121110; \
1322 \
1323 /* Iterate over the most significant byte of each element */ \
1324 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1325 /* Reverse indexing of "i" */ \
1326 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1327 if (b->VsrB(idx) & 0x80) { \
1328 /* Update each byte of the element */ \
1329 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1330 tmp.VsrB(idx + rk) = j + k; \
1331 } \
1332 j += SZ; \
1333 } \
1334 } \
1335 \
1336 *t = tmp; \
1337 }
1338
1339 #define XXGENPCV_LE_COMP(NAME, SZ) \
1340 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1341 { \
1342 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1343 \
1344 /* Iterate over the most significant byte of each element */ \
1345 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1346 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1347 /* Update each byte of the element */ \
1348 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1349 /* Reverse indexing of "j" */ \
1350 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1351 tmp.VsrB(idx + rk) = i + k; \
1352 } \
1353 j += SZ; \
1354 } \
1355 } \
1356 \
1357 *t = tmp; \
1358 }
1359
1360 #define XXGENPCV(NAME, SZ) \
1361 XXGENPCV_BE_EXP(NAME, SZ) \
1362 XXGENPCV_BE_COMP(NAME, SZ) \
1363 XXGENPCV_LE_EXP(NAME, SZ) \
1364 XXGENPCV_LE_COMP(NAME, SZ) \
1365
1366 XXGENPCV(XXGENPCVBM, 1)
1367 XXGENPCV(XXGENPCVHM, 2)
1368 XXGENPCV(XXGENPCVWM, 4)
1369 XXGENPCV(XXGENPCVDM, 8)
1370
1371 #undef XXGENPCV_BE_EXP
1372 #undef XXGENPCV_BE_COMP
1373 #undef XXGENPCV_LE_EXP
1374 #undef XXGENPCV_LE_COMP
1375 #undef XXGENPCV
1376
1377 #if HOST_BIG_ENDIAN
1378 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1379 #define VBPERMD_INDEX(i) (i)
1380 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1381 #else
1382 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1383 #define VBPERMD_INDEX(i) (1 - i)
1384 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1385 #endif
1386 #define EXTRACT_BIT(avr, i, index) \
1387 (extract64((avr)->VsrD(i), 63 - index, 1))
1388
1389 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1390 {
1391 int i, j;
1392 ppc_avr_t result = { .u64 = { 0, 0 } };
1393 VECTOR_FOR_INORDER_I(i, u64) {
1394 for (j = 0; j < 8; j++) {
1395 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1396 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1397 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1398 }
1399 }
1400 }
1401 *r = result;
1402 }
1403
1404 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1405 {
1406 int i;
1407 uint64_t perm = 0;
1408
1409 VECTOR_FOR_INORDER_I(i, u8) {
1410 int index = VBPERMQ_INDEX(b, i);
1411
1412 if (index < 128) {
1413 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1414 if (a->u64[VBPERMQ_DW(index)] & mask) {
1415 perm |= (0x8000 >> i);
1416 }
1417 }
1418 }
1419
1420 r->VsrD(0) = perm;
1421 r->VsrD(1) = 0;
1422 }
1423
1424 #undef VBPERMQ_INDEX
1425 #undef VBPERMQ_DW
1426
1427 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1428 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1429 { \
1430 int i, j; \
1431 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1432 \
1433 VECTOR_FOR_INORDER_I(i, srcfld) { \
1434 prod[i] = 0; \
1435 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1436 if (a->srcfld[i] & (1ull << j)) { \
1437 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1438 } \
1439 } \
1440 } \
1441 \
1442 VECTOR_FOR_INORDER_I(i, trgfld) { \
1443 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1444 } \
1445 }
1446
1447 PMSUM(vpmsumb, u8, u16, uint16_t)
1448 PMSUM(vpmsumh, u16, u32, uint32_t)
1449 PMSUM(vpmsumw, u32, u64, uint64_t)
1450
1451 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1452 {
1453 int i, j;
1454 Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
1455
1456 for (j = 0; j < 64; j++) {
1457 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1458 if (a->VsrD(i) & (1ull << j)) {
1459 tmp = int128_make64(b->VsrD(i));
1460 tmp = int128_lshift(tmp, j);
1461 prod[i] = int128_xor(prod[i], tmp);
1462 }
1463 }
1464 }
1465
1466 r->s128 = int128_xor(prod[0], prod[1]);
1467 }
1468
1469 #if HOST_BIG_ENDIAN
1470 #define PKBIG 1
1471 #else
1472 #define PKBIG 0
1473 #endif
1474 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1475 {
1476 int i, j;
1477 ppc_avr_t result;
1478 #if HOST_BIG_ENDIAN
1479 const ppc_avr_t *x[2] = { a, b };
1480 #else
1481 const ppc_avr_t *x[2] = { b, a };
1482 #endif
1483
1484 VECTOR_FOR_INORDER_I(i, u64) {
1485 VECTOR_FOR_INORDER_I(j, u32) {
1486 uint32_t e = x[i]->u32[j];
1487
1488 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1489 ((e >> 6) & 0x3e0) |
1490 ((e >> 3) & 0x1f));
1491 }
1492 }
1493 *r = result;
1494 }
1495
1496 #define VPK(suffix, from, to, cvt, dosat) \
1497 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1498 ppc_avr_t *a, ppc_avr_t *b) \
1499 { \
1500 int i; \
1501 int sat = 0; \
1502 ppc_avr_t result; \
1503 ppc_avr_t *a0 = PKBIG ? a : b; \
1504 ppc_avr_t *a1 = PKBIG ? b : a; \
1505 \
1506 VECTOR_FOR_INORDER_I(i, from) { \
1507 result.to[i] = cvt(a0->from[i], &sat); \
1508 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1509 } \
1510 *r = result; \
1511 if (dosat && sat) { \
1512 set_vscr_sat(env); \
1513 } \
1514 }
1515 #define I(x, y) (x)
1516 VPK(shss, s16, s8, cvtshsb, 1)
1517 VPK(shus, s16, u8, cvtshub, 1)
1518 VPK(swss, s32, s16, cvtswsh, 1)
1519 VPK(swus, s32, u16, cvtswuh, 1)
1520 VPK(sdss, s64, s32, cvtsdsw, 1)
1521 VPK(sdus, s64, u32, cvtsduw, 1)
1522 VPK(uhus, u16, u8, cvtuhub, 1)
1523 VPK(uwus, u32, u16, cvtuwuh, 1)
1524 VPK(udus, u64, u32, cvtuduw, 1)
1525 VPK(uhum, u16, u8, I, 0)
1526 VPK(uwum, u32, u16, I, 0)
1527 VPK(udum, u64, u32, I, 0)
1528 #undef I
1529 #undef VPK
1530 #undef PKBIG
1531
1532 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1533 {
1534 int i;
1535
1536 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1537 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1538 }
1539 }
1540
1541 #define VRFI(suffix, rounding) \
1542 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1543 ppc_avr_t *b) \
1544 { \
1545 int i; \
1546 float_status s = env->vec_status; \
1547 \
1548 set_float_rounding_mode(rounding, &s); \
1549 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1550 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1551 } \
1552 }
1553 VRFI(n, float_round_nearest_even)
1554 VRFI(m, float_round_down)
1555 VRFI(p, float_round_up)
1556 VRFI(z, float_round_to_zero)
1557 #undef VRFI
1558
1559 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1560 {
1561 int i;
1562
1563 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1564 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1565
1566 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1567 }
1568 }
1569
1570 #define VRLMI(name, size, element, insert) \
1571 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1572 { \
1573 int i; \
1574 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1575 uint##size##_t src1 = a->element[i]; \
1576 uint##size##_t src2 = b->element[i]; \
1577 uint##size##_t src3 = r->element[i]; \
1578 uint##size##_t begin, end, shift, mask, rot_val; \
1579 \
1580 shift = extract##size(src2, 0, 6); \
1581 end = extract##size(src2, 8, 6); \
1582 begin = extract##size(src2, 16, 6); \
1583 rot_val = rol##size(src1, shift); \
1584 mask = mask_u##size(begin, end); \
1585 if (insert) { \
1586 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1587 } else { \
1588 r->element[i] = (rot_val & mask); \
1589 } \
1590 } \
1591 }
1592
1593 VRLMI(VRLDMI, 64, u64, 1);
1594 VRLMI(VRLWMI, 32, u32, 1);
1595 VRLMI(VRLDNM, 64, u64, 0);
1596 VRLMI(VRLWNM, 32, u32, 0);
1597
1598 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1599 {
1600 int i;
1601
1602 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1603 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1604 }
1605 }
1606
1607 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1608 {
1609 int i;
1610
1611 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1612 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1613 }
1614 }
1615
1616 #define VEXTU_X_DO(name, size, left) \
1617 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1618 { \
1619 int index = (a & 0xf) * 8; \
1620 if (left) { \
1621 index = 128 - index - size; \
1622 } \
1623 return int128_getlo(int128_rshift(b->s128, index)) & \
1624 MAKE_64BIT_MASK(0, size); \
1625 }
1626 VEXTU_X_DO(vextublx, 8, 1)
1627 VEXTU_X_DO(vextuhlx, 16, 1)
1628 VEXTU_X_DO(vextuwlx, 32, 1)
1629 VEXTU_X_DO(vextubrx, 8, 0)
1630 VEXTU_X_DO(vextuhrx, 16, 0)
1631 VEXTU_X_DO(vextuwrx, 32, 0)
1632 #undef VEXTU_X_DO
1633
1634 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1635 {
1636 int i;
1637 unsigned int shift, bytes, size;
1638
1639 size = ARRAY_SIZE(r->u8);
1640 for (i = 0; i < size; i++) {
1641 shift = b->VsrB(i) & 0x7; /* extract shift value */
1642 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1643 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1644 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1645 }
1646 }
1647
1648 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1649 {
1650 int i;
1651 unsigned int shift, bytes;
1652
1653 /*
1654 * Use reverse order, as destination and source register can be
1655 * same. Its being modified in place saving temporary, reverse
1656 * order will guarantee that computed result is not fed back.
1657 */
1658 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1659 shift = b->VsrB(i) & 0x7; /* extract shift value */
1660 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1661 /* extract adjacent bytes */
1662 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1663 }
1664 }
1665
1666 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1667 {
1668 int sh = shift & 0xf;
1669 int i;
1670 ppc_avr_t result;
1671
1672 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1673 int index = sh + i;
1674 if (index > 0xf) {
1675 result.VsrB(i) = b->VsrB(index - 0x10);
1676 } else {
1677 result.VsrB(i) = a->VsrB(index);
1678 }
1679 }
1680 *r = result;
1681 }
1682
1683 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1684 {
1685 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1686
1687 #if HOST_BIG_ENDIAN
1688 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1689 memset(&r->u8[16 - sh], 0, sh);
1690 #else
1691 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1692 memset(&r->u8[0], 0, sh);
1693 #endif
1694 }
1695
1696 #if HOST_BIG_ENDIAN
1697 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1698 #else
1699 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1700 #endif
1701
1702 #define VINSX(SUFFIX, TYPE) \
1703 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1704 uint64_t val, target_ulong index) \
1705 { \
1706 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1707 target_long idx = index; \
1708 \
1709 if (idx < 0 || idx > maxidx) { \
1710 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1711 qemu_log_mask(LOG_GUEST_ERROR, \
1712 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1713 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1714 } else { \
1715 TYPE src = val; \
1716 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1717 } \
1718 }
1719 VINSX(B, uint8_t)
1720 VINSX(H, uint16_t)
1721 VINSX(W, uint32_t)
1722 VINSX(D, uint64_t)
1723 #undef ELEM_ADDR
1724 #undef VINSX
1725 #if HOST_BIG_ENDIAN
1726 #define VEXTDVLX(NAME, SIZE) \
1727 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1728 target_ulong index) \
1729 { \
1730 const target_long idx = index; \
1731 ppc_avr_t tmp[2] = { *a, *b }; \
1732 memset(t, 0, sizeof(*t)); \
1733 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1734 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1735 } else { \
1736 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1737 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1738 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1739 } \
1740 }
1741 #else
1742 #define VEXTDVLX(NAME, SIZE) \
1743 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1744 target_ulong index) \
1745 { \
1746 const target_long idx = index; \
1747 ppc_avr_t tmp[2] = { *b, *a }; \
1748 memset(t, 0, sizeof(*t)); \
1749 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1750 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1751 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1752 } else { \
1753 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1754 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1755 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1756 } \
1757 }
1758 #endif
1759 VEXTDVLX(VEXTDUBVLX, 1)
1760 VEXTDVLX(VEXTDUHVLX, 2)
1761 VEXTDVLX(VEXTDUWVLX, 4)
1762 VEXTDVLX(VEXTDDVLX, 8)
1763 #undef VEXTDVLX
1764 #if HOST_BIG_ENDIAN
1765 #define VEXTRACT(suffix, element) \
1766 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1767 { \
1768 uint32_t es = sizeof(r->element[0]); \
1769 memmove(&r->u8[8 - es], &b->u8[index], es); \
1770 memset(&r->u8[8], 0, 8); \
1771 memset(&r->u8[0], 0, 8 - es); \
1772 }
1773 #else
1774 #define VEXTRACT(suffix, element) \
1775 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1776 { \
1777 uint32_t es = sizeof(r->element[0]); \
1778 uint32_t s = (16 - index) - es; \
1779 memmove(&r->u8[8], &b->u8[s], es); \
1780 memset(&r->u8[0], 0, 8); \
1781 memset(&r->u8[8 + es], 0, 8 - es); \
1782 }
1783 #endif
1784 VEXTRACT(ub, u8)
1785 VEXTRACT(uh, u16)
1786 VEXTRACT(uw, u32)
1787 VEXTRACT(d, u64)
1788 #undef VEXTRACT
1789
1790 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1791 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1792 { \
1793 int i, idx, crf = 0; \
1794 \
1795 for (i = 0; i < NUM_ELEMS; i++) { \
1796 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1797 if (b->Vsr##ELEM(idx)) { \
1798 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1799 } else { \
1800 crf = 0b0010; \
1801 break; \
1802 } \
1803 } \
1804 \
1805 for (; i < NUM_ELEMS; i++) { \
1806 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1807 t->Vsr##ELEM(idx) = 0; \
1808 } \
1809 \
1810 return crf; \
1811 }
1812 VSTRI(VSTRIBL, B, 16, true)
1813 VSTRI(VSTRIBR, B, 16, false)
1814 VSTRI(VSTRIHL, H, 8, true)
1815 VSTRI(VSTRIHR, H, 8, false)
1816 #undef VSTRI
1817
1818 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1819 {
1820 ppc_vsr_t t = { };
1821 size_t es = sizeof(uint32_t);
1822 uint32_t ext_index;
1823 int i;
1824
1825 ext_index = index;
1826 for (i = 0; i < es; i++, ext_index++) {
1827 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1828 }
1829
1830 *xt = t;
1831 }
1832
1833 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1834 {
1835 ppc_vsr_t t = *xt;
1836 size_t es = sizeof(uint32_t);
1837 int ins_index, i = 0;
1838
1839 ins_index = index;
1840 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1841 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1842 }
1843
1844 *xt = t;
1845 }
1846
1847 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1848 uint32_t desc)
1849 {
1850 /*
1851 * Instead of processing imm bit-by-bit, we'll skip the computation of
1852 * conjunctions whose corresponding bit is unset.
1853 */
1854 int bit, imm = simd_data(desc);
1855 Int128 conj, disj = int128_zero();
1856
1857 /* Iterate over set bits from the least to the most significant bit */
1858 while (imm) {
1859 /*
1860 * Get the next bit to be processed with ctz64. Invert the result of
1861 * ctz64 to match the indexing used by PowerISA.
1862 */
1863 bit = 7 - ctzl(imm);
1864 if (bit & 0x4) {
1865 conj = a->s128;
1866 } else {
1867 conj = int128_not(a->s128);
1868 }
1869 if (bit & 0x2) {
1870 conj = int128_and(conj, b->s128);
1871 } else {
1872 conj = int128_and(conj, int128_not(b->s128));
1873 }
1874 if (bit & 0x1) {
1875 conj = int128_and(conj, c->s128);
1876 } else {
1877 conj = int128_and(conj, int128_not(c->s128));
1878 }
1879 disj = int128_or(disj, conj);
1880
1881 /* Unset the least significant bit that is set */
1882 imm &= imm - 1;
1883 }
1884
1885 t->s128 = disj;
1886 }
1887
1888 #define XXBLEND(name, sz) \
1889 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1890 ppc_avr_t *c, uint32_t desc) \
1891 { \
1892 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1893 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1894 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1895 } \
1896 }
1897 XXBLEND(B, 8)
1898 XXBLEND(H, 16)
1899 XXBLEND(W, 32)
1900 XXBLEND(D, 64)
1901 #undef XXBLEND
1902
1903 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1904 {
1905 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1906
1907 #if HOST_BIG_ENDIAN
1908 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1909 memset(&r->u8[0], 0, sh);
1910 #else
1911 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1912 memset(&r->u8[16 - sh], 0, sh);
1913 #endif
1914 }
1915
1916 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1917 {
1918 int64_t t;
1919 int i, upper;
1920 ppc_avr_t result;
1921 int sat = 0;
1922
1923 upper = ARRAY_SIZE(r->s32) - 1;
1924 t = (int64_t)b->VsrSW(upper);
1925 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1926 t += a->VsrSW(i);
1927 result.VsrSW(i) = 0;
1928 }
1929 result.VsrSW(upper) = cvtsdsw(t, &sat);
1930 *r = result;
1931
1932 if (sat) {
1933 set_vscr_sat(env);
1934 }
1935 }
1936
1937 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1938 {
1939 int i, j, upper;
1940 ppc_avr_t result;
1941 int sat = 0;
1942
1943 upper = 1;
1944 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1945 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1946
1947 result.VsrD(i) = 0;
1948 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1949 t += a->VsrSW(2 * i + j);
1950 }
1951 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1952 }
1953
1954 *r = result;
1955 if (sat) {
1956 set_vscr_sat(env);
1957 }
1958 }
1959
1960 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1961 {
1962 int i, j;
1963 int sat = 0;
1964
1965 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1966 int64_t t = (int64_t)b->s32[i];
1967
1968 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1969 t += a->s8[4 * i + j];
1970 }
1971 r->s32[i] = cvtsdsw(t, &sat);
1972 }
1973
1974 if (sat) {
1975 set_vscr_sat(env);
1976 }
1977 }
1978
1979 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1980 {
1981 int sat = 0;
1982 int i;
1983
1984 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1985 int64_t t = (int64_t)b->s32[i];
1986
1987 t += a->s16[2 * i] + a->s16[2 * i + 1];
1988 r->s32[i] = cvtsdsw(t, &sat);
1989 }
1990
1991 if (sat) {
1992 set_vscr_sat(env);
1993 }
1994 }
1995
1996 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1997 {
1998 int i, j;
1999 int sat = 0;
2000
2001 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2002 uint64_t t = (uint64_t)b->u32[i];
2003
2004 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2005 t += a->u8[4 * i + j];
2006 }
2007 r->u32[i] = cvtuduw(t, &sat);
2008 }
2009
2010 if (sat) {
2011 set_vscr_sat(env);
2012 }
2013 }
2014
2015 #if HOST_BIG_ENDIAN
2016 #define UPKHI 1
2017 #define UPKLO 0
2018 #else
2019 #define UPKHI 0
2020 #define UPKLO 1
2021 #endif
2022 #define VUPKPX(suffix, hi) \
2023 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2024 { \
2025 int i; \
2026 ppc_avr_t result; \
2027 \
2028 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2029 uint16_t e = b->u16[hi ? i : i + 4]; \
2030 uint8_t a = (e >> 15) ? 0xff : 0; \
2031 uint8_t r = (e >> 10) & 0x1f; \
2032 uint8_t g = (e >> 5) & 0x1f; \
2033 uint8_t b = e & 0x1f; \
2034 \
2035 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2036 } \
2037 *r = result; \
2038 }
2039 VUPKPX(lpx, UPKLO)
2040 VUPKPX(hpx, UPKHI)
2041 #undef VUPKPX
2042
2043 #define VUPK(suffix, unpacked, packee, hi) \
2044 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2045 { \
2046 int i; \
2047 ppc_avr_t result; \
2048 \
2049 if (hi) { \
2050 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2051 result.unpacked[i] = b->packee[i]; \
2052 } \
2053 } else { \
2054 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2055 i++) { \
2056 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2057 } \
2058 } \
2059 *r = result; \
2060 }
2061 VUPK(hsb, s16, s8, UPKHI)
2062 VUPK(hsh, s32, s16, UPKHI)
2063 VUPK(hsw, s64, s32, UPKHI)
2064 VUPK(lsb, s16, s8, UPKLO)
2065 VUPK(lsh, s32, s16, UPKLO)
2066 VUPK(lsw, s64, s32, UPKLO)
2067 #undef VUPK
2068 #undef UPKHI
2069 #undef UPKLO
2070
2071 #define VGENERIC_DO(name, element) \
2072 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2073 { \
2074 int i; \
2075 \
2076 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2077 r->element[i] = name(b->element[i]); \
2078 } \
2079 }
2080
2081 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2082 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2083
2084 VGENERIC_DO(clzb, u8)
2085 VGENERIC_DO(clzh, u16)
2086
2087 #undef clzb
2088 #undef clzh
2089
2090 #define ctzb(v) ((v) ? ctz32(v) : 8)
2091 #define ctzh(v) ((v) ? ctz32(v) : 16)
2092 #define ctzw(v) ctz32((v))
2093 #define ctzd(v) ctz64((v))
2094
2095 VGENERIC_DO(ctzb, u8)
2096 VGENERIC_DO(ctzh, u16)
2097 VGENERIC_DO(ctzw, u32)
2098 VGENERIC_DO(ctzd, u64)
2099
2100 #undef ctzb
2101 #undef ctzh
2102 #undef ctzw
2103 #undef ctzd
2104
2105 #define popcntb(v) ctpop8(v)
2106 #define popcnth(v) ctpop16(v)
2107 #define popcntw(v) ctpop32(v)
2108 #define popcntd(v) ctpop64(v)
2109
2110 VGENERIC_DO(popcntb, u8)
2111 VGENERIC_DO(popcnth, u16)
2112 VGENERIC_DO(popcntw, u32)
2113 VGENERIC_DO(popcntd, u64)
2114
2115 #undef popcntb
2116 #undef popcnth
2117 #undef popcntw
2118 #undef popcntd
2119
2120 #undef VGENERIC_DO
2121
2122 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2123 {
2124 r->s128 = int128_add(a->s128, b->s128);
2125 }
2126
2127 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2128 {
2129 r->s128 = int128_add(int128_add(a->s128, b->s128),
2130 int128_make64(int128_getlo(c->s128) & 1));
2131 }
2132
2133 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2134 {
2135 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
2136 r->VsrD(0) = 0;
2137 }
2138
2139 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2140 {
2141 bool carry_out = int128_ult(int128_not(a->s128), b->s128),
2142 carry_in = int128_getlo(c->s128) & 1;
2143
2144 if (!carry_out && carry_in) {
2145 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
2146 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
2147 }
2148
2149 r->VsrD(0) = 0;
2150 r->VsrD(1) = carry_out;
2151 }
2152
2153 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2154 {
2155 r->s128 = int128_sub(a->s128, b->s128);
2156 }
2157
2158 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2159 {
2160 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
2161 int128_make64(int128_getlo(c->s128) & 1));
2162 }
2163
2164 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2165 {
2166 Int128 tmp = int128_not(b->s128);
2167
2168 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
2169 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
2170 r->VsrD(0) = 0;
2171 }
2172
2173 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2174 {
2175 Int128 tmp = int128_not(b->s128);
2176 bool carry_out = int128_ult(int128_not(a->s128), tmp),
2177 carry_in = int128_getlo(c->s128) & 1;
2178
2179 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
2180 int128_makes64(-1)));
2181 r->VsrD(0) = 0;
2182 }
2183
2184 #define BCD_PLUS_PREF_1 0xC
2185 #define BCD_PLUS_PREF_2 0xF
2186 #define BCD_PLUS_ALT_1 0xA
2187 #define BCD_NEG_PREF 0xD
2188 #define BCD_NEG_ALT 0xB
2189 #define BCD_PLUS_ALT_2 0xE
2190 #define NATIONAL_PLUS 0x2B
2191 #define NATIONAL_NEG 0x2D
2192
2193 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2194
2195 static int bcd_get_sgn(ppc_avr_t *bcd)
2196 {
2197 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2198 case BCD_PLUS_PREF_1:
2199 case BCD_PLUS_PREF_2:
2200 case BCD_PLUS_ALT_1:
2201 case BCD_PLUS_ALT_2:
2202 {
2203 return 1;
2204 }
2205
2206 case BCD_NEG_PREF:
2207 case BCD_NEG_ALT:
2208 {
2209 return -1;
2210 }
2211
2212 default:
2213 {
2214 return 0;
2215 }
2216 }
2217 }
2218
2219 static int bcd_preferred_sgn(int sgn, int ps)
2220 {
2221 if (sgn >= 0) {
2222 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2223 } else {
2224 return BCD_NEG_PREF;
2225 }
2226 }
2227
2228 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2229 {
2230 uint8_t result;
2231 if (n & 1) {
2232 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2233 } else {
2234 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2235 }
2236
2237 if (unlikely(result > 9)) {
2238 *invalid = true;
2239 }
2240 return result;
2241 }
2242
2243 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2244 {
2245 if (n & 1) {
2246 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2247 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2248 } else {
2249 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2250 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2251 }
2252 }
2253
2254 static bool bcd_is_valid(ppc_avr_t *bcd)
2255 {
2256 int i;
2257 int invalid = 0;
2258
2259 if (bcd_get_sgn(bcd) == 0) {
2260 return false;
2261 }
2262
2263 for (i = 1; i < 32; i++) {
2264 bcd_get_digit(bcd, i, &invalid);
2265 if (unlikely(invalid)) {
2266 return false;
2267 }
2268 }
2269 return true;
2270 }
2271
2272 static int bcd_cmp_zero(ppc_avr_t *bcd)
2273 {
2274 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2275 return CRF_EQ;
2276 } else {
2277 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2278 }
2279 }
2280
2281 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2282 {
2283 return reg->VsrH(7 - n);
2284 }
2285
2286 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2287 {
2288 reg->VsrH(7 - n) = val;
2289 }
2290
2291 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2292 {
2293 int i;
2294 int invalid = 0;
2295 for (i = 31; i > 0; i--) {
2296 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2297 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2298 if (unlikely(invalid)) {
2299 return 0; /* doesn't matter */
2300 } else if (dig_a > dig_b) {
2301 return 1;
2302 } else if (dig_a < dig_b) {
2303 return -1;
2304 }
2305 }
2306
2307 return 0;
2308 }
2309
2310 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2311 int *overflow)
2312 {
2313 int carry = 0;
2314 int i;
2315 int is_zero = 1;
2316
2317 for (i = 1; i <= 31; i++) {
2318 uint8_t digit = bcd_get_digit(a, i, invalid) +
2319 bcd_get_digit(b, i, invalid) + carry;
2320 is_zero &= (digit == 0);
2321 if (digit > 9) {
2322 carry = 1;
2323 digit -= 10;
2324 } else {
2325 carry = 0;
2326 }
2327
2328 bcd_put_digit(t, digit, i);
2329 }
2330
2331 *overflow = carry;
2332 return is_zero;
2333 }
2334
2335 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2336 int *overflow)
2337 {
2338 int carry = 0;
2339 int i;
2340
2341 for (i = 1; i <= 31; i++) {
2342 uint8_t digit = bcd_get_digit(a, i, invalid) -
2343 bcd_get_digit(b, i, invalid) + carry;
2344 if (digit & 0x80) {
2345 carry = -1;
2346 digit += 10;
2347 } else {
2348 carry = 0;
2349 }
2350
2351 bcd_put_digit(t, digit, i);
2352 }
2353
2354 *overflow = carry;
2355 }
2356
2357 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2358 {
2359
2360 int sgna = bcd_get_sgn(a);
2361 int sgnb = bcd_get_sgn(b);
2362 int invalid = (sgna == 0) || (sgnb == 0);
2363 int overflow = 0;
2364 int zero = 0;
2365 uint32_t cr = 0;
2366 ppc_avr_t result = { .u64 = { 0, 0 } };
2367
2368 if (!invalid) {
2369 if (sgna == sgnb) {
2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2371 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2372 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2373 } else {
2374 int magnitude = bcd_cmp_mag(a, b);
2375 if (magnitude > 0) {
2376 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2377 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2378 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2379 } else if (magnitude < 0) {
2380 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2381 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2382 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2383 } else {
2384 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2385 cr = CRF_EQ;
2386 }
2387 }
2388 }
2389
2390 if (unlikely(invalid)) {
2391 result.VsrD(0) = result.VsrD(1) = -1;
2392 cr = CRF_SO;
2393 } else if (overflow) {
2394 cr |= CRF_SO;
2395 } else if (zero) {
2396 cr |= CRF_EQ;
2397 }
2398
2399 *r = result;
2400
2401 return cr;
2402 }
2403
2404 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2405 {
2406 ppc_avr_t bcopy = *b;
2407 int sgnb = bcd_get_sgn(b);
2408 if (sgnb < 0) {
2409 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2410 } else if (sgnb > 0) {
2411 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2412 }
2413 /* else invalid ... defer to bcdadd code for proper handling */
2414
2415 return helper_bcdadd(r, a, &bcopy, ps);
2416 }
2417
2418 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2419 {
2420 int i;
2421 int cr = 0;
2422 uint16_t national = 0;
2423 uint16_t sgnb = get_national_digit(b, 0);
2424 ppc_avr_t ret = { .u64 = { 0, 0 } };
2425 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2426
2427 for (i = 1; i < 8; i++) {
2428 national = get_national_digit(b, i);
2429 if (unlikely(national < 0x30 || national > 0x39)) {
2430 invalid = 1;
2431 break;
2432 }
2433
2434 bcd_put_digit(&ret, national & 0xf, i);
2435 }
2436
2437 if (sgnb == NATIONAL_PLUS) {
2438 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2439 } else {
2440 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2441 }
2442
2443 cr = bcd_cmp_zero(&ret);
2444
2445 if (unlikely(invalid)) {
2446 cr = CRF_SO;
2447 }
2448
2449 *r = ret;
2450
2451 return cr;
2452 }
2453
2454 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2455 {
2456 int i;
2457 int cr = 0;
2458 int sgnb = bcd_get_sgn(b);
2459 int invalid = (sgnb == 0);
2460 ppc_avr_t ret = { .u64 = { 0, 0 } };
2461
2462 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2463
2464 for (i = 1; i < 8; i++) {
2465 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2466
2467 if (unlikely(invalid)) {
2468 break;
2469 }
2470 }
2471 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2472
2473 cr = bcd_cmp_zero(b);
2474
2475 if (ox_flag) {
2476 cr |= CRF_SO;
2477 }
2478
2479 if (unlikely(invalid)) {
2480 cr = CRF_SO;
2481 }
2482
2483 *r = ret;
2484
2485 return cr;
2486 }
2487
2488 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2489 {
2490 int i;
2491 int cr = 0;
2492 int invalid = 0;
2493 int zone_digit = 0;
2494 int zone_lead = ps ? 0xF : 0x3;
2495 int digit = 0;
2496 ppc_avr_t ret = { .u64 = { 0, 0 } };
2497 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2498
2499 if (unlikely((sgnb < 0xA) && ps)) {
2500 invalid = 1;
2501 }
2502
2503 for (i = 0; i < 16; i++) {
2504 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2505 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2506 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2507 invalid = 1;
2508 break;
2509 }
2510
2511 bcd_put_digit(&ret, digit, i + 1);
2512 }
2513
2514 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2515 (!ps && (sgnb & 0x4))) {
2516 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2517 } else {
2518 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2519 }
2520
2521 cr = bcd_cmp_zero(&ret);
2522
2523 if (unlikely(invalid)) {
2524 cr = CRF_SO;
2525 }
2526
2527 *r = ret;
2528
2529 return cr;
2530 }
2531
2532 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2533 {
2534 int i;
2535 int cr = 0;
2536 uint8_t digit = 0;
2537 int sgnb = bcd_get_sgn(b);
2538 int zone_lead = (ps) ? 0xF0 : 0x30;
2539 int invalid = (sgnb == 0);
2540 ppc_avr_t ret = { .u64 = { 0, 0 } };
2541
2542 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2543
2544 for (i = 0; i < 16; i++) {
2545 digit = bcd_get_digit(b, i + 1, &invalid);
2546
2547 if (unlikely(invalid)) {
2548 break;
2549 }
2550
2551 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2552 }
2553
2554 if (ps) {
2555 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2556 } else {
2557 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2558 }
2559
2560 cr = bcd_cmp_zero(b);
2561
2562 if (ox_flag) {
2563 cr |= CRF_SO;
2564 }
2565
2566 if (unlikely(invalid)) {
2567 cr = CRF_SO;
2568 }
2569
2570 *r = ret;
2571
2572 return cr;
2573 }
2574
2575 /**
2576 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2577 *
2578 * Returns:
2579 * > 0 if ahi|alo > bhi|blo,
2580 * 0 if ahi|alo == bhi|blo,
2581 * < 0 if ahi|alo < bhi|blo
2582 */
2583 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2584 uint64_t blo, uint64_t bhi)
2585 {
2586 return (ahi == bhi) ?
2587 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2588 (ahi > bhi ? 1 : -1);
2589 }
2590
2591 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2592 {
2593 int i;
2594 int cr;
2595 uint64_t lo_value;
2596 uint64_t hi_value;
2597 uint64_t rem;
2598 ppc_avr_t ret = { .u64 = { 0, 0 } };
2599
2600 if (b->VsrSD(0) < 0) {
2601 lo_value = -b->VsrSD(1);
2602 hi_value = ~b->VsrD(0) + !lo_value;
2603 bcd_put_digit(&ret, 0xD, 0);
2604
2605 cr = CRF_LT;
2606 } else {
2607 lo_value = b->VsrD(1);
2608 hi_value = b->VsrD(0);
2609 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2610
2611 if (hi_value == 0 && lo_value == 0) {
2612 cr = CRF_EQ;
2613 } else {
2614 cr = CRF_GT;
2615 }
2616 }
2617
2618 /*
2619 * Check src limits: abs(src) <= 10^31 - 1
2620 *
2621 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2622 */
2623 if (ucmp128(lo_value, hi_value,
2624 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2625 cr |= CRF_SO;
2626
2627 /*
2628 * According to the ISA, if src wouldn't fit in the destination
2629 * register, the result is undefined.
2630 * In that case, we leave r unchanged.
2631 */
2632 } else {
2633 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2634
2635 for (i = 1; i < 16; rem /= 10, i++) {
2636 bcd_put_digit(&ret, rem % 10, i);
2637 }
2638
2639 for (; i < 32; lo_value /= 10, i++) {
2640 bcd_put_digit(&ret, lo_value % 10, i);
2641 }
2642
2643 *r = ret;
2644 }
2645
2646 return cr;
2647 }
2648
2649 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2650 {
2651 uint8_t i;
2652 int cr;
2653 uint64_t carry;
2654 uint64_t unused;
2655 uint64_t lo_value;
2656 uint64_t hi_value = 0;
2657 int sgnb = bcd_get_sgn(b);
2658 int invalid = (sgnb == 0);
2659
2660 lo_value = bcd_get_digit(b, 31, &invalid);
2661 for (i = 30; i > 0; i--) {
2662 mulu64(&lo_value, &carry, lo_value, 10ULL);
2663 mulu64(&hi_value, &unused, hi_value, 10ULL);
2664 lo_value += bcd_get_digit(b, i, &invalid);
2665 hi_value += carry;
2666
2667 if (unlikely(invalid)) {
2668 break;
2669 }
2670 }
2671
2672 if (sgnb == -1) {
2673 r->VsrSD(1) = -lo_value;
2674 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2675 } else {
2676 r->VsrSD(1) = lo_value;
2677 r->VsrSD(0) = hi_value;
2678 }
2679
2680 cr = bcd_cmp_zero(b);
2681
2682 if (unlikely(invalid)) {
2683 cr = CRF_SO;
2684 }
2685
2686 return cr;
2687 }
2688
2689 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2690 {
2691 int i;
2692 int invalid = 0;
2693
2694 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2695 return CRF_SO;
2696 }
2697
2698 *r = *a;
2699 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2700
2701 for (i = 1; i < 32; i++) {
2702 bcd_get_digit(a, i, &invalid);
2703 bcd_get_digit(b, i, &invalid);
2704 if (unlikely(invalid)) {
2705 return CRF_SO;
2706 }
2707 }
2708
2709 return bcd_cmp_zero(r);
2710 }
2711
2712 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2713 {
2714 int sgnb = bcd_get_sgn(b);
2715
2716 *r = *b;
2717 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2718
2719 if (bcd_is_valid(b) == false) {
2720 return CRF_SO;
2721 }
2722
2723 return bcd_cmp_zero(r);
2724 }
2725
2726 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2727 {
2728 int cr;
2729 int i = a->VsrSB(7);
2730 bool ox_flag = false;
2731 int sgnb = bcd_get_sgn(b);
2732 ppc_avr_t ret = *b;
2733 ret.VsrD(1) &= ~0xf;
2734
2735 if (bcd_is_valid(b) == false) {
2736 return CRF_SO;
2737 }
2738
2739 if (unlikely(i > 31)) {
2740 i = 31;
2741 } else if (unlikely(i < -31)) {
2742 i = -31;
2743 }
2744
2745 if (i > 0) {
2746 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2747 } else {
2748 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2749 }
2750 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2751
2752 *r = ret;
2753
2754 cr = bcd_cmp_zero(r);
2755 if (ox_flag) {
2756 cr |= CRF_SO;
2757 }
2758
2759 return cr;
2760 }
2761
2762 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2763 {
2764 int cr;
2765 int i;
2766 int invalid = 0;
2767 bool ox_flag = false;
2768 ppc_avr_t ret = *b;
2769
2770 for (i = 0; i < 32; i++) {
2771 bcd_get_digit(b, i, &invalid);
2772
2773 if (unlikely(invalid)) {
2774 return CRF_SO;
2775 }
2776 }
2777
2778 i = a->VsrSB(7);
2779 if (i >= 32) {
2780 ox_flag = true;
2781 ret.VsrD(1) = ret.VsrD(0) = 0;
2782 } else if (i <= -32) {
2783 ret.VsrD(1) = ret.VsrD(0) = 0;
2784 } else if (i > 0) {
2785 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2786 } else {
2787 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2788 }
2789 *r = ret;
2790
2791 cr = bcd_cmp_zero(r);
2792 if (ox_flag) {
2793 cr |= CRF_SO;
2794 }
2795
2796 return cr;
2797 }
2798
2799 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2800 {
2801 int cr;
2802 int unused = 0;
2803 int invalid = 0;
2804 bool ox_flag = false;
2805 int sgnb = bcd_get_sgn(b);
2806 ppc_avr_t ret = *b;
2807 ret.VsrD(1) &= ~0xf;
2808
2809 int i = a->VsrSB(7);
2810 ppc_avr_t bcd_one;
2811
2812 bcd_one.VsrD(0) = 0;
2813 bcd_one.VsrD(1) = 0x10;
2814
2815 if (bcd_is_valid(b) == false) {
2816 return CRF_SO;
2817 }
2818
2819 if (unlikely(i > 31)) {
2820 i = 31;
2821 } else if (unlikely(i < -31)) {
2822 i = -31;
2823 }
2824
2825 if (i > 0) {
2826 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2827 } else {
2828 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2829
2830 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2831 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2832 }
2833 }
2834 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2835
2836 cr = bcd_cmp_zero(&ret);
2837 if (ox_flag) {
2838 cr |= CRF_SO;
2839 }
2840 *r = ret;
2841
2842 return cr;
2843 }
2844
2845 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2846 {
2847 uint64_t mask;
2848 uint32_t ox_flag = 0;
2849 int i = a->VsrSH(3) + 1;
2850 ppc_avr_t ret = *b;
2851
2852 if (bcd_is_valid(b) == false) {
2853 return CRF_SO;
2854 }
2855
2856 if (i > 16 && i < 32) {
2857 mask = (uint64_t)-1 >> (128 - i * 4);
2858 if (ret.VsrD(0) & ~mask) {
2859 ox_flag = CRF_SO;
2860 }
2861
2862 ret.VsrD(0) &= mask;
2863 } else if (i >= 0 && i <= 16) {
2864 mask = (uint64_t)-1 >> (64 - i * 4);
2865 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2866 ox_flag = CRF_SO;
2867 }
2868
2869 ret.VsrD(1) &= mask;
2870 ret.VsrD(0) = 0;
2871 }
2872 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2873 *r = ret;
2874
2875 return bcd_cmp_zero(&ret) | ox_flag;
2876 }
2877
2878 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2879 {
2880 int i;
2881 uint64_t mask;
2882 uint32_t ox_flag = 0;
2883 int invalid = 0;
2884 ppc_avr_t ret = *b;
2885
2886 for (i = 0; i < 32; i++) {
2887 bcd_get_digit(b, i, &invalid);
2888
2889 if (unlikely(invalid)) {
2890 return CRF_SO;
2891 }
2892 }
2893
2894 i = a->VsrSH(3);
2895 if (i > 16 && i < 33) {
2896 mask = (uint64_t)-1 >> (128 - i * 4);
2897 if (ret.VsrD(0) & ~mask) {
2898 ox_flag = CRF_SO;
2899 }
2900
2901 ret.VsrD(0) &= mask;
2902 } else if (i > 0 && i <= 16) {
2903 mask = (uint64_t)-1 >> (64 - i * 4);
2904 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2905 ox_flag = CRF_SO;
2906 }
2907
2908 ret.VsrD(1) &= mask;
2909 ret.VsrD(0) = 0;
2910 } else if (i == 0) {
2911 if (ret.VsrD(0) || ret.VsrD(1)) {
2912 ox_flag = CRF_SO;
2913 }
2914 ret.VsrD(0) = ret.VsrD(1) = 0;
2915 }
2916
2917 *r = ret;
2918 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2919 return ox_flag | CRF_EQ;
2920 }
2921
2922 return ox_flag | CRF_GT;
2923 }
2924
2925 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2926 {
2927 int i;
2928 VECTOR_FOR_INORDER_I(i, u8) {
2929 r->u8[i] = AES_sbox[a->u8[i]];
2930 }
2931 }
2932
2933 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2934 {
2935 ppc_avr_t result;
2936 int i;
2937
2938 VECTOR_FOR_INORDER_I(i, u32) {
2939 result.VsrW(i) = b->VsrW(i) ^
2940 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2941 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2942 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2943 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2944 }
2945 *r = result;
2946 }
2947
2948 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2949 {
2950 ppc_avr_t result;
2951 int i;
2952
2953 VECTOR_FOR_INORDER_I(i, u8) {
2954 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2955 }
2956 *r = result;
2957 }
2958
2959 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2960 {
2961 /* This differs from what is written in ISA V2.07. The RTL is */
2962 /* incorrect and will be fixed in V2.07B. */
2963 int i;
2964 ppc_avr_t tmp;
2965
2966 VECTOR_FOR_INORDER_I(i, u8) {
2967 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2968 }
2969
2970 VECTOR_FOR_INORDER_I(i, u32) {
2971 r->VsrW(i) =
2972 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2973 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2974 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2975 AES_imc[tmp.VsrB(4 * i + 3)][3];
2976 }
2977 }
2978
2979 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2980 {
2981 ppc_avr_t result;
2982 int i;
2983
2984 VECTOR_FOR_INORDER_I(i, u8) {
2985 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2986 }
2987 *r = result;
2988 }
2989
2990 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2991 {
2992 int st = (st_six & 0x10) != 0;
2993 int six = st_six & 0xF;
2994 int i;
2995
2996 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2997 if (st == 0) {
2998 if ((six & (0x8 >> i)) == 0) {
2999 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3000 ror32(a->VsrW(i), 18) ^
3001 (a->VsrW(i) >> 3);
3002 } else { /* six.bit[i] == 1 */
3003 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3004 ror32(a->VsrW(i), 19) ^
3005 (a->VsrW(i) >> 10);
3006 }
3007 } else { /* st == 1 */
3008 if ((six & (0x8 >> i)) == 0) {
3009 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3010 ror32(a->VsrW(i), 13) ^
3011 ror32(a->VsrW(i), 22);
3012 } else { /* six.bit[i] == 1 */
3013 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3014 ror32(a->VsrW(i), 11) ^
3015 ror32(a->VsrW(i), 25);
3016 }
3017 }
3018 }
3019 }
3020
3021 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3022 {
3023 int st = (st_six & 0x10) != 0;
3024 int six = st_six & 0xF;
3025 int i;
3026
3027 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3028 if (st == 0) {
3029 if ((six & (0x8 >> (2 * i))) == 0) {
3030 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3031 ror64(a->VsrD(i), 8) ^
3032 (a->VsrD(i) >> 7);
3033 } else { /* six.bit[2*i] == 1 */
3034 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3035 ror64(a->VsrD(i), 61) ^
3036 (a->VsrD(i) >> 6);
3037 }
3038 } else { /* st == 1 */
3039 if ((six & (0x8 >> (2 * i))) == 0) {
3040 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3041 ror64(a->VsrD(i), 34) ^
3042 ror64(a->VsrD(i), 39);
3043 } else { /* six.bit[2*i] == 1 */
3044 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3045 ror64(a->VsrD(i), 18) ^
3046 ror64(a->VsrD(i), 41);
3047 }
3048 }
3049 }
3050 }
3051
3052 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3053 {
3054 ppc_avr_t result;
3055 int i;
3056
3057 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3058 int indexA = c->VsrB(i) >> 4;
3059 int indexB = c->VsrB(i) & 0xF;
3060
3061 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3062 }
3063 *r = result;
3064 }
3065
3066 #undef VECTOR_FOR_INORDER_I
3067
3068 /*****************************************************************************/
3069 /* SPE extension helpers */
3070 /* Use a table to make this quicker */
3071 static const uint8_t hbrev[16] = {
3072 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3073 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3074 };
3075
3076 static inline uint8_t byte_reverse(uint8_t val)
3077 {
3078 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3079 }
3080
3081 static inline uint32_t word_reverse(uint32_t val)
3082 {
3083 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3084 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3085 }
3086
3087 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3088 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3089 {
3090 uint32_t a, b, d, mask;
3091
3092 mask = UINT32_MAX >> (32 - MASKBITS);
3093 a = arg1 & mask;
3094 b = arg2 & mask;
3095 d = word_reverse(1 + word_reverse(a | ~b));
3096 return (arg1 & ~mask) | (d & b);
3097 }
3098
3099 uint32_t helper_cntlsw32(uint32_t val)
3100 {
3101 if (val & 0x80000000) {
3102 return clz32(~val);
3103 } else {
3104 return clz32(val);
3105 }
3106 }
3107
3108 uint32_t helper_cntlzw32(uint32_t val)
3109 {
3110 return clz32(val);
3111 }
3112
3113 /* 440 specific */
3114 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3115 target_ulong low, uint32_t update_Rc)
3116 {
3117 target_ulong mask;
3118 int i;
3119
3120 i = 1;
3121 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3122 if ((high & mask) == 0) {
3123 if (update_Rc) {
3124 env->crf[0] = 0x4;
3125 }
3126 goto done;
3127 }
3128 i++;
3129 }
3130 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3131 if ((low & mask) == 0) {
3132 if (update_Rc) {
3133 env->crf[0] = 0x8;
3134 }
3135 goto done;
3136 }
3137 i++;
3138 }
3139 i = 8;
3140 if (update_Rc) {
3141 env->crf[0] = 0x2;
3142 }
3143 done:
3144 env->xer = (env->xer & ~0x7F) | i;
3145 if (update_Rc) {
3146 env->crf[0] |= xer_so;
3147 }
3148 return i;
3149 }