]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
Merge tag 'pull-tcg-20220621' of https://gitlab.com/rth7680/qemu into staging
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
32
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
36
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 {
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
43 }
44 }
45
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48 {
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
68 }
69
70 return (target_ulong)rt;
71 }
72
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75 {
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
96 }
97
98 return (target_ulong)rt;
99 }
100
101 #if defined(TARGET_PPC64)
102
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 {
105 uint64_t rt = 0;
106 int overflow = 0;
107
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
113 }
114
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
117 }
118
119 return rt;
120 }
121
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 {
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
128
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
134 }
135
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
138 }
139
140 return rt;
141 }
142
143 #endif
144
145
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
150 /*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 {
163 return hasvalue(rb, ra) ? CRF_GT : 0;
164 }
165
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
169
170 /*
171 * Return a random number.
172 */
173 uint64_t helper_darn32(void)
174 {
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
184
185 return ret;
186 }
187
188 uint64_t helper_darn64(void)
189 {
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
199
200 return ret;
201 }
202
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 {
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217 }
218
219 #endif
220
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 {
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234 }
235
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
239 {
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
250 }
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
254 }
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
258 }
259 return (target_long)ret;
260 }
261
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
265 {
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
276 }
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
280 }
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
284 }
285 return ret;
286 }
287 #endif
288
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
291 {
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300 }
301
302 target_ulong helper_popcntw(target_ulong val)
303 {
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316 }
317 #else
318 target_ulong helper_popcntb(target_ulong val)
319 {
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325 }
326 #endif
327
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
329 {
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388 }
389
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 {
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406 }
407
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 {
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424 }
425
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if HOST_BIG_ENDIAN
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
435
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
478
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 {
481 ppc_store_vscr(env, vscr);
482 }
483
484 uint32_t helper_mfvscr(CPUPPCState *env)
485 {
486 return ppc_get_vscr(env);
487 }
488
489 static inline void set_vscr_sat(CPUPPCState *env)
490 {
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
493 }
494
495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 {
497 int i;
498
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
501 }
502 }
503
504 /* vprtybw */
505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 {
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
512 }
513 }
514
515 /* vprtybd */
516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 {
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
524 }
525 }
526
527 /* vprtybq */
528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 {
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
536 }
537
538 #define VARITHFP(suffix, func) \
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
541 { \
542 int i; \
543 \
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
546 } \
547 }
548 VARITHFP(addfp, float32_add)
549 VARITHFP(subfp, float32_sub)
550 VARITHFP(minfp, float32_min)
551 VARITHFP(maxfp, float32_max)
552 #undef VARITHFP
553
554 #define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
557 { \
558 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
562 } \
563 }
564 VARITHFPFMA(maddfp, 0);
565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566 #undef VARITHFPFMA
567
568 #define VARITHSAT_CASE(type, op, cvt, element) \
569 { \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
572 }
573
574 #define VARITHSAT_DO(name, op, optype, cvt, element) \
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 { \
578 int sat = 0; \
579 int i; \
580 \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
582 VARITHSAT_CASE(optype, op, cvt, element); \
583 } \
584 if (sat) { \
585 vscr_sat->u32[0] = 1; \
586 } \
587 }
588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600 #undef VARITHSAT_CASE
601 #undef VARITHSAT_DO
602 #undef VARITHSAT_SIGNED
603 #undef VARITHSAT_UNSIGNED
604
605 #define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 { \
608 int i; \
609 \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
613 } \
614 }
615
616 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620 VAVG(b, s8, int16_t, u8, uint16_t)
621 VAVG(h, s16, int32_t, u16, uint32_t)
622 VAVG(w, s32, int64_t, u32, uint64_t)
623 #undef VAVG_DO
624 #undef VAVG
625
626 #define VABSDU_DO(name, element) \
627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 int i; \
630 \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
635 } \
636 }
637
638 /*
639 * VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
642 */
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645 VABSDU(b, u8)
646 VABSDU(h, u16)
647 VABSDU(w, u32)
648 #undef VABSDU_DO
649 #undef VABSDU
650
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
654 { \
655 int i; \
656 \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
660 } \
661 }
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
664 #undef VCF
665
666 #define VCMPNEZ(NAME, ELEM) \
667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 { \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
672 } \
673 }
674 VCMPNEZ(VCMPNEZB, u8)
675 VCMPNEZ(VCMPNEZH, u16)
676 VCMPNEZ(VCMPNEZW, u32)
677 #undef VCMPNEZ
678
679 #define VCMPFP_DO(suffix, compare, order, record) \
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
682 { \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
687 \
688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
689 uint32_t result; \
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
699 } \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
703 } \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
706 } \
707 }
708 #define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711 VCMPFP(eqfp, ==, float_relation_equal)
712 VCMPFP(gefp, !=, float_relation_less)
713 VCMPFP(gtfp, ==, float_relation_greater)
714 #undef VCMPFP_DO
715 #undef VCMPFP
716
717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
719 {
720 int i;
721 int all_in = 0;
722
723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 all_in = 1;
729 } else {
730 float32 bneg = float32_chs(b->f32[i]);
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
735
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
738 }
739 }
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
742 }
743 }
744
745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 0);
748 }
749
750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
752 {
753 vcmpbfp_internal(env, r, a, b, 1);
754 }
755
756 #define VCT(suffix, satcvt, element) \
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
759 { \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
763 \
764 set_float_rounding_mode(float_round_to_zero, &s); \
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
767 r->element[i] = 0; \
768 } else { \
769 float64 t = float32_to_float64(b->f32[i], &s); \
770 int64_t j; \
771 \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
775 } \
776 } \
777 if (sat) { \
778 set_vscr_sat(env); \
779 } \
780 }
781 VCT(uxs, cvtsduw, u32)
782 VCT(sxs, cvtsdsw, s32)
783 #undef VCT
784
785 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
786
787 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
788 {
789 int64_t psum = 0;
790 for (int i = 0; i < 8; i++, mask >>= 1) {
791 if (mask & 1) {
792 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
793 }
794 }
795 return psum;
796 }
797
798 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
799 {
800 int64_t psum = 0;
801 for (int i = 0; i < 4; i++, mask >>= 1) {
802 if (mask & 1) {
803 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
804 }
805 }
806 return psum;
807 }
808
809 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
810 {
811 int64_t psum = 0;
812 for (int i = 0; i < 2; i++, mask >>= 1) {
813 if (mask & 1) {
814 psum += (int64_t)sextract32(a, 16 * i, 16) *
815 sextract32(b, 16 * i, 16);
816 }
817 }
818 return psum;
819 }
820
821 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
822 uint32_t mask, bool sat, bool acc, do_ger ger)
823 {
824 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
825 xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
826 ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
827 uint8_t xmsk_bit, ymsk_bit;
828 int64_t psum;
829 int i, j;
830 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
831 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
832 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
833 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
834 if (acc) {
835 psum += at[i].VsrSW(j);
836 }
837 if (sat && psum > INT32_MAX) {
838 set_vscr_sat(env);
839 at[i].VsrSW(j) = INT32_MAX;
840 } else if (sat && psum < INT32_MIN) {
841 set_vscr_sat(env);
842 at[i].VsrSW(j) = INT32_MIN;
843 } else {
844 at[i].VsrSW(j) = (int32_t) psum;
845 }
846 } else {
847 at[i].VsrSW(j) = 0;
848 }
849 }
850 }
851 }
852
853 QEMU_FLATTEN
854 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
855 ppc_acc_t *at, uint32_t mask)
856 {
857 xviger(env, a, b, at, mask, false, false, ger_rank8);
858 }
859
860 QEMU_FLATTEN
861 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
862 ppc_acc_t *at, uint32_t mask)
863 {
864 xviger(env, a, b, at, mask, false, true, ger_rank8);
865 }
866
867 QEMU_FLATTEN
868 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
869 ppc_acc_t *at, uint32_t mask)
870 {
871 xviger(env, a, b, at, mask, false, false, ger_rank4);
872 }
873
874 QEMU_FLATTEN
875 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
876 ppc_acc_t *at, uint32_t mask)
877 {
878 xviger(env, a, b, at, mask, false, true, ger_rank4);
879 }
880
881 QEMU_FLATTEN
882 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
883 ppc_acc_t *at, uint32_t mask)
884 {
885 xviger(env, a, b, at, mask, true, true, ger_rank4);
886 }
887
888 QEMU_FLATTEN
889 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
890 ppc_acc_t *at, uint32_t mask)
891 {
892 xviger(env, a, b, at, mask, false, false, ger_rank2);
893 }
894
895 QEMU_FLATTEN
896 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
897 ppc_acc_t *at, uint32_t mask)
898 {
899 xviger(env, a, b, at, mask, true, false, ger_rank2);
900 }
901
902 QEMU_FLATTEN
903 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
904 ppc_acc_t *at, uint32_t mask)
905 {
906 xviger(env, a, b, at, mask, false, true, ger_rank2);
907 }
908
909 QEMU_FLATTEN
910 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
911 ppc_acc_t *at, uint32_t mask)
912 {
913 xviger(env, a, b, at, mask, true, true, ger_rank2);
914 }
915
916 target_ulong helper_vclzlsbb(ppc_avr_t *r)
917 {
918 target_ulong count = 0;
919 int i;
920 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
921 if (r->VsrB(i) & 0x01) {
922 break;
923 }
924 count++;
925 }
926 return count;
927 }
928
929 target_ulong helper_vctzlsbb(ppc_avr_t *r)
930 {
931 target_ulong count = 0;
932 int i;
933 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
934 if (r->VsrB(i) & 0x01) {
935 break;
936 }
937 count++;
938 }
939 return count;
940 }
941
942 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
943 ppc_avr_t *b, ppc_avr_t *c)
944 {
945 int sat = 0;
946 int i;
947
948 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
949 int32_t prod = a->s16[i] * b->s16[i];
950 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
951
952 r->s16[i] = cvtswsh(t, &sat);
953 }
954
955 if (sat) {
956 set_vscr_sat(env);
957 }
958 }
959
960 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
961 ppc_avr_t *b, ppc_avr_t *c)
962 {
963 int sat = 0;
964 int i;
965
966 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
967 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
968 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
969 r->s16[i] = cvtswsh(t, &sat);
970 }
971
972 if (sat) {
973 set_vscr_sat(env);
974 }
975 }
976
977 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
978 {
979 int i;
980
981 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
982 int32_t prod = a->s16[i] * b->s16[i];
983 r->s16[i] = (int16_t) (prod + c->s16[i]);
984 }
985 }
986
987 #define VMRG_DO(name, element, access, ofs) \
988 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
989 { \
990 ppc_avr_t result; \
991 int i, half = ARRAY_SIZE(r->element) / 2; \
992 \
993 for (i = 0; i < half; i++) { \
994 result.access(i * 2 + 0) = a->access(i + ofs); \
995 result.access(i * 2 + 1) = b->access(i + ofs); \
996 } \
997 *r = result; \
998 }
999
1000 #define VMRG(suffix, element, access) \
1001 VMRG_DO(mrgl##suffix, element, access, half) \
1002 VMRG_DO(mrgh##suffix, element, access, 0)
1003 VMRG(b, u8, VsrB)
1004 VMRG(h, u16, VsrH)
1005 VMRG(w, u32, VsrW)
1006 #undef VMRG_DO
1007 #undef VMRG
1008
1009 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1010 {
1011 int32_t prod[16];
1012 int i;
1013
1014 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1015 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1016 }
1017
1018 VECTOR_FOR_INORDER_I(i, s32) {
1019 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1020 prod[4 * i + 2] + prod[4 * i + 3];
1021 }
1022 }
1023
1024 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1025 {
1026 int32_t prod[8];
1027 int i;
1028
1029 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1030 prod[i] = a->s16[i] * b->s16[i];
1031 }
1032
1033 VECTOR_FOR_INORDER_I(i, s32) {
1034 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1035 }
1036 }
1037
1038 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1039 ppc_avr_t *b, ppc_avr_t *c)
1040 {
1041 int32_t prod[8];
1042 int i;
1043 int sat = 0;
1044
1045 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1046 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1047 }
1048
1049 VECTOR_FOR_INORDER_I(i, s32) {
1050 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1051
1052 r->u32[i] = cvtsdsw(t, &sat);
1053 }
1054
1055 if (sat) {
1056 set_vscr_sat(env);
1057 }
1058 }
1059
1060 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1061 {
1062 uint16_t prod[16];
1063 int i;
1064
1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1066 prod[i] = a->u8[i] * b->u8[i];
1067 }
1068
1069 VECTOR_FOR_INORDER_I(i, u32) {
1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1071 prod[4 * i + 2] + prod[4 * i + 3];
1072 }
1073 }
1074
1075 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1076 {
1077 uint32_t prod[8];
1078 int i;
1079
1080 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1081 prod[i] = a->u16[i] * b->u16[i];
1082 }
1083
1084 VECTOR_FOR_INORDER_I(i, u32) {
1085 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1086 }
1087 }
1088
1089 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1090 ppc_avr_t *b, ppc_avr_t *c)
1091 {
1092 uint32_t prod[8];
1093 int i;
1094 int sat = 0;
1095
1096 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1097 prod[i] = a->u16[i] * b->u16[i];
1098 }
1099
1100 VECTOR_FOR_INORDER_I(i, s32) {
1101 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1102
1103 r->u32[i] = cvtuduw(t, &sat);
1104 }
1105
1106 if (sat) {
1107 set_vscr_sat(env);
1108 }
1109 }
1110
1111 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1112 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1113 { \
1114 int i; \
1115 \
1116 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1117 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1118 (cast)b->mul_access(i); \
1119 } \
1120 }
1121
1122 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1123 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1124 { \
1125 int i; \
1126 \
1127 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1128 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1129 (cast)b->mul_access(i + 1); \
1130 } \
1131 }
1132
1133 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1134 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1135 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1136 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1137 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1138 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1139 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1140 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1141 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1142 #undef VMUL_DO_EVN
1143 #undef VMUL_DO_ODD
1144 #undef VMUL
1145
1146 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1147 target_ulong uim)
1148 {
1149 int i, idx;
1150 ppc_vsr_t tmp = { .u64 = {0, 0} };
1151
1152 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1153 if ((pcv->VsrB(i) >> 5) == uim) {
1154 idx = pcv->VsrB(i) & 0x1f;
1155 if (idx < ARRAY_SIZE(t->u8)) {
1156 tmp.VsrB(i) = s0->VsrB(idx);
1157 } else {
1158 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1159 }
1160 }
1161 }
1162
1163 *t = tmp;
1164 }
1165
1166 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1167 {
1168 Int128 neg1 = int128_makes64(-1);
1169 Int128 int128_min = int128_make128(0, INT64_MIN);
1170 if (likely(int128_nz(b->s128) &&
1171 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1172 t->s128 = int128_divs(a->s128, b->s128);
1173 } else {
1174 t->s128 = a->s128; /* Undefined behavior */
1175 }
1176 }
1177
1178 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1179 {
1180 if (int128_nz(b->s128)) {
1181 t->s128 = int128_divu(a->s128, b->s128);
1182 } else {
1183 t->s128 = a->s128; /* Undefined behavior */
1184 }
1185 }
1186
1187 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1188 {
1189 int i;
1190 int64_t high;
1191 uint64_t low;
1192 for (i = 0; i < 2; i++) {
1193 high = a->s64[i];
1194 low = 0;
1195 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1196 t->s64[i] = a->s64[i]; /* Undefined behavior */
1197 } else {
1198 divs128(&low, &high, b->s64[i]);
1199 t->s64[i] = low;
1200 }
1201 }
1202 }
1203
1204 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1205 {
1206 int i;
1207 uint64_t high, low;
1208 for (i = 0; i < 2; i++) {
1209 high = a->u64[i];
1210 low = 0;
1211 if (unlikely(!b->u64[i])) {
1212 t->u64[i] = a->u64[i]; /* Undefined behavior */
1213 } else {
1214 divu128(&low, &high, b->u64[i]);
1215 t->u64[i] = low;
1216 }
1217 }
1218 }
1219
1220 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1221 {
1222 Int128 high, low;
1223 Int128 int128_min = int128_make128(0, INT64_MIN);
1224 Int128 neg1 = int128_makes64(-1);
1225
1226 high = a->s128;
1227 low = int128_zero();
1228 if (unlikely(!int128_nz(b->s128) ||
1229 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1230 t->s128 = a->s128; /* Undefined behavior */
1231 } else {
1232 divs256(&low, &high, b->s128);
1233 t->s128 = low;
1234 }
1235 }
1236
1237 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1238 {
1239 Int128 high, low;
1240
1241 high = a->s128;
1242 low = int128_zero();
1243 if (unlikely(!int128_nz(b->s128))) {
1244 t->s128 = a->s128; /* Undefined behavior */
1245 } else {
1246 divu256(&low, &high, b->s128);
1247 t->s128 = low;
1248 }
1249 }
1250
1251 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1252 {
1253 Int128 neg1 = int128_makes64(-1);
1254 Int128 int128_min = int128_make128(0, INT64_MIN);
1255 if (likely(int128_nz(b->s128) &&
1256 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1257 t->s128 = int128_rems(a->s128, b->s128);
1258 } else {
1259 t->s128 = int128_zero(); /* Undefined behavior */
1260 }
1261 }
1262
1263 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1264 {
1265 if (likely(int128_nz(b->s128))) {
1266 t->s128 = int128_remu(a->s128, b->s128);
1267 } else {
1268 t->s128 = int128_zero(); /* Undefined behavior */
1269 }
1270 }
1271
1272 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1273 {
1274 ppc_avr_t result;
1275 int i;
1276
1277 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1278 int s = c->VsrB(i) & 0x1f;
1279 int index = s & 0xf;
1280
1281 if (s & 0x10) {
1282 result.VsrB(i) = b->VsrB(index);
1283 } else {
1284 result.VsrB(i) = a->VsrB(index);
1285 }
1286 }
1287 *r = result;
1288 }
1289
1290 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1291 {
1292 ppc_avr_t result;
1293 int i;
1294
1295 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1296 int s = c->VsrB(i) & 0x1f;
1297 int index = 15 - (s & 0xf);
1298
1299 if (s & 0x10) {
1300 result.VsrB(i) = a->VsrB(index);
1301 } else {
1302 result.VsrB(i) = b->VsrB(index);
1303 }
1304 }
1305 *r = result;
1306 }
1307
1308 #define XXGENPCV_BE_EXP(NAME, SZ) \
1309 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1310 { \
1311 ppc_vsr_t tmp; \
1312 \
1313 /* Initialize tmp with the result of an all-zeros mask */ \
1314 tmp.VsrD(0) = 0x1011121314151617; \
1315 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1316 \
1317 /* Iterate over the most significant byte of each element */ \
1318 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1319 if (b->VsrB(i) & 0x80) { \
1320 /* Update each byte of the element */ \
1321 for (int k = 0; k < SZ; k++) { \
1322 tmp.VsrB(i + k) = j + k; \
1323 } \
1324 j += SZ; \
1325 } \
1326 } \
1327 \
1328 *t = tmp; \
1329 }
1330
1331 #define XXGENPCV_BE_COMP(NAME, SZ) \
1332 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1333 { \
1334 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1335 \
1336 /* Iterate over the most significant byte of each element */ \
1337 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1338 if (b->VsrB(i) & 0x80) { \
1339 /* Update each byte of the element */ \
1340 for (int k = 0; k < SZ; k++) { \
1341 tmp.VsrB(j + k) = i + k; \
1342 } \
1343 j += SZ; \
1344 } \
1345 } \
1346 \
1347 *t = tmp; \
1348 }
1349
1350 #define XXGENPCV_LE_EXP(NAME, SZ) \
1351 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1352 { \
1353 ppc_vsr_t tmp; \
1354 \
1355 /* Initialize tmp with the result of an all-zeros mask */ \
1356 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1357 tmp.VsrD(1) = 0x1716151413121110; \
1358 \
1359 /* Iterate over the most significant byte of each element */ \
1360 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1361 /* Reverse indexing of "i" */ \
1362 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1363 if (b->VsrB(idx) & 0x80) { \
1364 /* Update each byte of the element */ \
1365 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1366 tmp.VsrB(idx + rk) = j + k; \
1367 } \
1368 j += SZ; \
1369 } \
1370 } \
1371 \
1372 *t = tmp; \
1373 }
1374
1375 #define XXGENPCV_LE_COMP(NAME, SZ) \
1376 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1377 { \
1378 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1379 \
1380 /* Iterate over the most significant byte of each element */ \
1381 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1382 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1383 /* Update each byte of the element */ \
1384 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1385 /* Reverse indexing of "j" */ \
1386 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1387 tmp.VsrB(idx + rk) = i + k; \
1388 } \
1389 j += SZ; \
1390 } \
1391 } \
1392 \
1393 *t = tmp; \
1394 }
1395
1396 #define XXGENPCV(NAME, SZ) \
1397 XXGENPCV_BE_EXP(NAME, SZ) \
1398 XXGENPCV_BE_COMP(NAME, SZ) \
1399 XXGENPCV_LE_EXP(NAME, SZ) \
1400 XXGENPCV_LE_COMP(NAME, SZ) \
1401
1402 XXGENPCV(XXGENPCVBM, 1)
1403 XXGENPCV(XXGENPCVHM, 2)
1404 XXGENPCV(XXGENPCVWM, 4)
1405 XXGENPCV(XXGENPCVDM, 8)
1406
1407 #undef XXGENPCV_BE_EXP
1408 #undef XXGENPCV_BE_COMP
1409 #undef XXGENPCV_LE_EXP
1410 #undef XXGENPCV_LE_COMP
1411 #undef XXGENPCV
1412
1413 #if HOST_BIG_ENDIAN
1414 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1415 #define VBPERMD_INDEX(i) (i)
1416 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1417 #else
1418 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1419 #define VBPERMD_INDEX(i) (1 - i)
1420 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1421 #endif
1422 #define EXTRACT_BIT(avr, i, index) \
1423 (extract64((avr)->VsrD(i), 63 - index, 1))
1424
1425 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1426 {
1427 int i, j;
1428 ppc_avr_t result = { .u64 = { 0, 0 } };
1429 VECTOR_FOR_INORDER_I(i, u64) {
1430 for (j = 0; j < 8; j++) {
1431 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1432 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1433 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1434 }
1435 }
1436 }
1437 *r = result;
1438 }
1439
1440 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1441 {
1442 int i;
1443 uint64_t perm = 0;
1444
1445 VECTOR_FOR_INORDER_I(i, u8) {
1446 int index = VBPERMQ_INDEX(b, i);
1447
1448 if (index < 128) {
1449 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1450 if (a->u64[VBPERMQ_DW(index)] & mask) {
1451 perm |= (0x8000 >> i);
1452 }
1453 }
1454 }
1455
1456 r->VsrD(0) = perm;
1457 r->VsrD(1) = 0;
1458 }
1459
1460 #undef VBPERMQ_INDEX
1461 #undef VBPERMQ_DW
1462
1463 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1464 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1465 { \
1466 int i, j; \
1467 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1468 \
1469 VECTOR_FOR_INORDER_I(i, srcfld) { \
1470 prod[i] = 0; \
1471 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1472 if (a->srcfld[i] & (1ull << j)) { \
1473 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1474 } \
1475 } \
1476 } \
1477 \
1478 VECTOR_FOR_INORDER_I(i, trgfld) { \
1479 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1480 } \
1481 }
1482
1483 PMSUM(vpmsumb, u8, u16, uint16_t)
1484 PMSUM(vpmsumh, u16, u32, uint32_t)
1485 PMSUM(vpmsumw, u32, u64, uint64_t)
1486
1487 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1488 {
1489
1490 #ifdef CONFIG_INT128
1491 int i, j;
1492 __uint128_t prod[2];
1493
1494 VECTOR_FOR_INORDER_I(i, u64) {
1495 prod[i] = 0;
1496 for (j = 0; j < 64; j++) {
1497 if (a->u64[i] & (1ull << j)) {
1498 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1499 }
1500 }
1501 }
1502
1503 r->u128 = prod[0] ^ prod[1];
1504
1505 #else
1506 int i, j;
1507 ppc_avr_t prod[2];
1508
1509 VECTOR_FOR_INORDER_I(i, u64) {
1510 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1511 for (j = 0; j < 64; j++) {
1512 if (a->u64[i] & (1ull << j)) {
1513 ppc_avr_t bshift;
1514 if (j == 0) {
1515 bshift.VsrD(0) = 0;
1516 bshift.VsrD(1) = b->u64[i];
1517 } else {
1518 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1519 bshift.VsrD(1) = b->u64[i] << j;
1520 }
1521 prod[i].VsrD(1) ^= bshift.VsrD(1);
1522 prod[i].VsrD(0) ^= bshift.VsrD(0);
1523 }
1524 }
1525 }
1526
1527 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1528 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1529 #endif
1530 }
1531
1532
1533 #if HOST_BIG_ENDIAN
1534 #define PKBIG 1
1535 #else
1536 #define PKBIG 0
1537 #endif
1538 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1539 {
1540 int i, j;
1541 ppc_avr_t result;
1542 #if HOST_BIG_ENDIAN
1543 const ppc_avr_t *x[2] = { a, b };
1544 #else
1545 const ppc_avr_t *x[2] = { b, a };
1546 #endif
1547
1548 VECTOR_FOR_INORDER_I(i, u64) {
1549 VECTOR_FOR_INORDER_I(j, u32) {
1550 uint32_t e = x[i]->u32[j];
1551
1552 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1553 ((e >> 6) & 0x3e0) |
1554 ((e >> 3) & 0x1f));
1555 }
1556 }
1557 *r = result;
1558 }
1559
1560 #define VPK(suffix, from, to, cvt, dosat) \
1561 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1562 ppc_avr_t *a, ppc_avr_t *b) \
1563 { \
1564 int i; \
1565 int sat = 0; \
1566 ppc_avr_t result; \
1567 ppc_avr_t *a0 = PKBIG ? a : b; \
1568 ppc_avr_t *a1 = PKBIG ? b : a; \
1569 \
1570 VECTOR_FOR_INORDER_I(i, from) { \
1571 result.to[i] = cvt(a0->from[i], &sat); \
1572 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1573 } \
1574 *r = result; \
1575 if (dosat && sat) { \
1576 set_vscr_sat(env); \
1577 } \
1578 }
1579 #define I(x, y) (x)
1580 VPK(shss, s16, s8, cvtshsb, 1)
1581 VPK(shus, s16, u8, cvtshub, 1)
1582 VPK(swss, s32, s16, cvtswsh, 1)
1583 VPK(swus, s32, u16, cvtswuh, 1)
1584 VPK(sdss, s64, s32, cvtsdsw, 1)
1585 VPK(sdus, s64, u32, cvtsduw, 1)
1586 VPK(uhus, u16, u8, cvtuhub, 1)
1587 VPK(uwus, u32, u16, cvtuwuh, 1)
1588 VPK(udus, u64, u32, cvtuduw, 1)
1589 VPK(uhum, u16, u8, I, 0)
1590 VPK(uwum, u32, u16, I, 0)
1591 VPK(udum, u64, u32, I, 0)
1592 #undef I
1593 #undef VPK
1594 #undef PKBIG
1595
1596 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1597 {
1598 int i;
1599
1600 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1601 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1602 }
1603 }
1604
1605 #define VRFI(suffix, rounding) \
1606 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1607 ppc_avr_t *b) \
1608 { \
1609 int i; \
1610 float_status s = env->vec_status; \
1611 \
1612 set_float_rounding_mode(rounding, &s); \
1613 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1614 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1615 } \
1616 }
1617 VRFI(n, float_round_nearest_even)
1618 VRFI(m, float_round_down)
1619 VRFI(p, float_round_up)
1620 VRFI(z, float_round_to_zero)
1621 #undef VRFI
1622
1623 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1624 {
1625 int i;
1626
1627 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1628 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1629
1630 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1631 }
1632 }
1633
1634 #define VRLMI(name, size, element, insert) \
1635 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1636 { \
1637 int i; \
1638 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1639 uint##size##_t src1 = a->element[i]; \
1640 uint##size##_t src2 = b->element[i]; \
1641 uint##size##_t src3 = r->element[i]; \
1642 uint##size##_t begin, end, shift, mask, rot_val; \
1643 \
1644 shift = extract##size(src2, 0, 6); \
1645 end = extract##size(src2, 8, 6); \
1646 begin = extract##size(src2, 16, 6); \
1647 rot_val = rol##size(src1, shift); \
1648 mask = mask_u##size(begin, end); \
1649 if (insert) { \
1650 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1651 } else { \
1652 r->element[i] = (rot_val & mask); \
1653 } \
1654 } \
1655 }
1656
1657 VRLMI(VRLDMI, 64, u64, 1);
1658 VRLMI(VRLWMI, 32, u32, 1);
1659 VRLMI(VRLDNM, 64, u64, 0);
1660 VRLMI(VRLWNM, 32, u32, 0);
1661
1662 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1663 {
1664 int i;
1665
1666 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1667 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1668 }
1669 }
1670
1671 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1672 {
1673 int i;
1674
1675 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1676 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1677 }
1678 }
1679
1680 #define VEXTU_X_DO(name, size, left) \
1681 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1682 { \
1683 int index = (a & 0xf) * 8; \
1684 if (left) { \
1685 index = 128 - index - size; \
1686 } \
1687 return int128_getlo(int128_rshift(b->s128, index)) & \
1688 MAKE_64BIT_MASK(0, size); \
1689 }
1690 VEXTU_X_DO(vextublx, 8, 1)
1691 VEXTU_X_DO(vextuhlx, 16, 1)
1692 VEXTU_X_DO(vextuwlx, 32, 1)
1693 VEXTU_X_DO(vextubrx, 8, 0)
1694 VEXTU_X_DO(vextuhrx, 16, 0)
1695 VEXTU_X_DO(vextuwrx, 32, 0)
1696 #undef VEXTU_X_DO
1697
1698 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1699 {
1700 int i;
1701 unsigned int shift, bytes, size;
1702
1703 size = ARRAY_SIZE(r->u8);
1704 for (i = 0; i < size; i++) {
1705 shift = b->VsrB(i) & 0x7; /* extract shift value */
1706 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1707 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1708 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1709 }
1710 }
1711
1712 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1713 {
1714 int i;
1715 unsigned int shift, bytes;
1716
1717 /*
1718 * Use reverse order, as destination and source register can be
1719 * same. Its being modified in place saving temporary, reverse
1720 * order will guarantee that computed result is not fed back.
1721 */
1722 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1723 shift = b->VsrB(i) & 0x7; /* extract shift value */
1724 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1725 /* extract adjacent bytes */
1726 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1727 }
1728 }
1729
1730 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1731 {
1732 int sh = shift & 0xf;
1733 int i;
1734 ppc_avr_t result;
1735
1736 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1737 int index = sh + i;
1738 if (index > 0xf) {
1739 result.VsrB(i) = b->VsrB(index - 0x10);
1740 } else {
1741 result.VsrB(i) = a->VsrB(index);
1742 }
1743 }
1744 *r = result;
1745 }
1746
1747 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1748 {
1749 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1750
1751 #if HOST_BIG_ENDIAN
1752 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1753 memset(&r->u8[16 - sh], 0, sh);
1754 #else
1755 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1756 memset(&r->u8[0], 0, sh);
1757 #endif
1758 }
1759
1760 #if HOST_BIG_ENDIAN
1761 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1762 #else
1763 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1764 #endif
1765
1766 #define VINSX(SUFFIX, TYPE) \
1767 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1768 uint64_t val, target_ulong index) \
1769 { \
1770 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1771 target_long idx = index; \
1772 \
1773 if (idx < 0 || idx > maxidx) { \
1774 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1775 qemu_log_mask(LOG_GUEST_ERROR, \
1776 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1777 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1778 } else { \
1779 TYPE src = val; \
1780 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1781 } \
1782 }
1783 VINSX(B, uint8_t)
1784 VINSX(H, uint16_t)
1785 VINSX(W, uint32_t)
1786 VINSX(D, uint64_t)
1787 #undef ELEM_ADDR
1788 #undef VINSX
1789 #if HOST_BIG_ENDIAN
1790 #define VEXTDVLX(NAME, SIZE) \
1791 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1792 target_ulong index) \
1793 { \
1794 const target_long idx = index; \
1795 ppc_avr_t tmp[2] = { *a, *b }; \
1796 memset(t, 0, sizeof(*t)); \
1797 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1798 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1799 } else { \
1800 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1801 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1802 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1803 } \
1804 }
1805 #else
1806 #define VEXTDVLX(NAME, SIZE) \
1807 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1808 target_ulong index) \
1809 { \
1810 const target_long idx = index; \
1811 ppc_avr_t tmp[2] = { *b, *a }; \
1812 memset(t, 0, sizeof(*t)); \
1813 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1814 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1815 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1816 } else { \
1817 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1818 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1819 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1820 } \
1821 }
1822 #endif
1823 VEXTDVLX(VEXTDUBVLX, 1)
1824 VEXTDVLX(VEXTDUHVLX, 2)
1825 VEXTDVLX(VEXTDUWVLX, 4)
1826 VEXTDVLX(VEXTDDVLX, 8)
1827 #undef VEXTDVLX
1828 #if HOST_BIG_ENDIAN
1829 #define VEXTRACT(suffix, element) \
1830 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1831 { \
1832 uint32_t es = sizeof(r->element[0]); \
1833 memmove(&r->u8[8 - es], &b->u8[index], es); \
1834 memset(&r->u8[8], 0, 8); \
1835 memset(&r->u8[0], 0, 8 - es); \
1836 }
1837 #else
1838 #define VEXTRACT(suffix, element) \
1839 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1840 { \
1841 uint32_t es = sizeof(r->element[0]); \
1842 uint32_t s = (16 - index) - es; \
1843 memmove(&r->u8[8], &b->u8[s], es); \
1844 memset(&r->u8[0], 0, 8); \
1845 memset(&r->u8[8 + es], 0, 8 - es); \
1846 }
1847 #endif
1848 VEXTRACT(ub, u8)
1849 VEXTRACT(uh, u16)
1850 VEXTRACT(uw, u32)
1851 VEXTRACT(d, u64)
1852 #undef VEXTRACT
1853
1854 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1855 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1856 { \
1857 int i, idx, crf = 0; \
1858 \
1859 for (i = 0; i < NUM_ELEMS; i++) { \
1860 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1861 if (b->Vsr##ELEM(idx)) { \
1862 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1863 } else { \
1864 crf = 0b0010; \
1865 break; \
1866 } \
1867 } \
1868 \
1869 for (; i < NUM_ELEMS; i++) { \
1870 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1871 t->Vsr##ELEM(idx) = 0; \
1872 } \
1873 \
1874 return crf; \
1875 }
1876 VSTRI(VSTRIBL, B, 16, true)
1877 VSTRI(VSTRIBR, B, 16, false)
1878 VSTRI(VSTRIHL, H, 8, true)
1879 VSTRI(VSTRIHR, H, 8, false)
1880 #undef VSTRI
1881
1882 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1883 {
1884 ppc_vsr_t t = { };
1885 size_t es = sizeof(uint32_t);
1886 uint32_t ext_index;
1887 int i;
1888
1889 ext_index = index;
1890 for (i = 0; i < es; i++, ext_index++) {
1891 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1892 }
1893
1894 *xt = t;
1895 }
1896
1897 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1898 {
1899 ppc_vsr_t t = *xt;
1900 size_t es = sizeof(uint32_t);
1901 int ins_index, i = 0;
1902
1903 ins_index = index;
1904 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1905 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1906 }
1907
1908 *xt = t;
1909 }
1910
1911 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1912 uint32_t desc)
1913 {
1914 /*
1915 * Instead of processing imm bit-by-bit, we'll skip the computation of
1916 * conjunctions whose corresponding bit is unset.
1917 */
1918 int bit, imm = simd_data(desc);
1919 Int128 conj, disj = int128_zero();
1920
1921 /* Iterate over set bits from the least to the most significant bit */
1922 while (imm) {
1923 /*
1924 * Get the next bit to be processed with ctz64. Invert the result of
1925 * ctz64 to match the indexing used by PowerISA.
1926 */
1927 bit = 7 - ctzl(imm);
1928 if (bit & 0x4) {
1929 conj = a->s128;
1930 } else {
1931 conj = int128_not(a->s128);
1932 }
1933 if (bit & 0x2) {
1934 conj = int128_and(conj, b->s128);
1935 } else {
1936 conj = int128_and(conj, int128_not(b->s128));
1937 }
1938 if (bit & 0x1) {
1939 conj = int128_and(conj, c->s128);
1940 } else {
1941 conj = int128_and(conj, int128_not(c->s128));
1942 }
1943 disj = int128_or(disj, conj);
1944
1945 /* Unset the least significant bit that is set */
1946 imm &= imm - 1;
1947 }
1948
1949 t->s128 = disj;
1950 }
1951
1952 #define XXBLEND(name, sz) \
1953 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1954 ppc_avr_t *c, uint32_t desc) \
1955 { \
1956 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1957 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1958 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1959 } \
1960 }
1961 XXBLEND(B, 8)
1962 XXBLEND(H, 16)
1963 XXBLEND(W, 32)
1964 XXBLEND(D, 64)
1965 #undef XXBLEND
1966
1967 #define VNEG(name, element) \
1968 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1969 { \
1970 int i; \
1971 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1972 r->element[i] = -b->element[i]; \
1973 } \
1974 }
1975 VNEG(vnegw, s32)
1976 VNEG(vnegd, s64)
1977 #undef VNEG
1978
1979 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1980 {
1981 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1982
1983 #if HOST_BIG_ENDIAN
1984 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1985 memset(&r->u8[0], 0, sh);
1986 #else
1987 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1988 memset(&r->u8[16 - sh], 0, sh);
1989 #endif
1990 }
1991
1992 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1993 {
1994 int i;
1995
1996 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1997 r->u32[i] = a->u32[i] >= b->u32[i];
1998 }
1999 }
2000
2001 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2002 {
2003 int64_t t;
2004 int i, upper;
2005 ppc_avr_t result;
2006 int sat = 0;
2007
2008 upper = ARRAY_SIZE(r->s32) - 1;
2009 t = (int64_t)b->VsrSW(upper);
2010 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2011 t += a->VsrSW(i);
2012 result.VsrSW(i) = 0;
2013 }
2014 result.VsrSW(upper) = cvtsdsw(t, &sat);
2015 *r = result;
2016
2017 if (sat) {
2018 set_vscr_sat(env);
2019 }
2020 }
2021
2022 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2023 {
2024 int i, j, upper;
2025 ppc_avr_t result;
2026 int sat = 0;
2027
2028 upper = 1;
2029 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2030 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
2031
2032 result.VsrD(i) = 0;
2033 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2034 t += a->VsrSW(2 * i + j);
2035 }
2036 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2037 }
2038
2039 *r = result;
2040 if (sat) {
2041 set_vscr_sat(env);
2042 }
2043 }
2044
2045 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2046 {
2047 int i, j;
2048 int sat = 0;
2049
2050 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2051 int64_t t = (int64_t)b->s32[i];
2052
2053 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2054 t += a->s8[4 * i + j];
2055 }
2056 r->s32[i] = cvtsdsw(t, &sat);
2057 }
2058
2059 if (sat) {
2060 set_vscr_sat(env);
2061 }
2062 }
2063
2064 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2065 {
2066 int sat = 0;
2067 int i;
2068
2069 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2070 int64_t t = (int64_t)b->s32[i];
2071
2072 t += a->s16[2 * i] + a->s16[2 * i + 1];
2073 r->s32[i] = cvtsdsw(t, &sat);
2074 }
2075
2076 if (sat) {
2077 set_vscr_sat(env);
2078 }
2079 }
2080
2081 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2082 {
2083 int i, j;
2084 int sat = 0;
2085
2086 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2087 uint64_t t = (uint64_t)b->u32[i];
2088
2089 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2090 t += a->u8[4 * i + j];
2091 }
2092 r->u32[i] = cvtuduw(t, &sat);
2093 }
2094
2095 if (sat) {
2096 set_vscr_sat(env);
2097 }
2098 }
2099
2100 #if HOST_BIG_ENDIAN
2101 #define UPKHI 1
2102 #define UPKLO 0
2103 #else
2104 #define UPKHI 0
2105 #define UPKLO 1
2106 #endif
2107 #define VUPKPX(suffix, hi) \
2108 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2109 { \
2110 int i; \
2111 ppc_avr_t result; \
2112 \
2113 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2114 uint16_t e = b->u16[hi ? i : i + 4]; \
2115 uint8_t a = (e >> 15) ? 0xff : 0; \
2116 uint8_t r = (e >> 10) & 0x1f; \
2117 uint8_t g = (e >> 5) & 0x1f; \
2118 uint8_t b = e & 0x1f; \
2119 \
2120 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2121 } \
2122 *r = result; \
2123 }
2124 VUPKPX(lpx, UPKLO)
2125 VUPKPX(hpx, UPKHI)
2126 #undef VUPKPX
2127
2128 #define VUPK(suffix, unpacked, packee, hi) \
2129 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2130 { \
2131 int i; \
2132 ppc_avr_t result; \
2133 \
2134 if (hi) { \
2135 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2136 result.unpacked[i] = b->packee[i]; \
2137 } \
2138 } else { \
2139 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2140 i++) { \
2141 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2142 } \
2143 } \
2144 *r = result; \
2145 }
2146 VUPK(hsb, s16, s8, UPKHI)
2147 VUPK(hsh, s32, s16, UPKHI)
2148 VUPK(hsw, s64, s32, UPKHI)
2149 VUPK(lsb, s16, s8, UPKLO)
2150 VUPK(lsh, s32, s16, UPKLO)
2151 VUPK(lsw, s64, s32, UPKLO)
2152 #undef VUPK
2153 #undef UPKHI
2154 #undef UPKLO
2155
2156 #define VGENERIC_DO(name, element) \
2157 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2158 { \
2159 int i; \
2160 \
2161 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2162 r->element[i] = name(b->element[i]); \
2163 } \
2164 }
2165
2166 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2167 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2168
2169 VGENERIC_DO(clzb, u8)
2170 VGENERIC_DO(clzh, u16)
2171
2172 #undef clzb
2173 #undef clzh
2174
2175 #define ctzb(v) ((v) ? ctz32(v) : 8)
2176 #define ctzh(v) ((v) ? ctz32(v) : 16)
2177 #define ctzw(v) ctz32((v))
2178 #define ctzd(v) ctz64((v))
2179
2180 VGENERIC_DO(ctzb, u8)
2181 VGENERIC_DO(ctzh, u16)
2182 VGENERIC_DO(ctzw, u32)
2183 VGENERIC_DO(ctzd, u64)
2184
2185 #undef ctzb
2186 #undef ctzh
2187 #undef ctzw
2188 #undef ctzd
2189
2190 #define popcntb(v) ctpop8(v)
2191 #define popcnth(v) ctpop16(v)
2192 #define popcntw(v) ctpop32(v)
2193 #define popcntd(v) ctpop64(v)
2194
2195 VGENERIC_DO(popcntb, u8)
2196 VGENERIC_DO(popcnth, u16)
2197 VGENERIC_DO(popcntw, u32)
2198 VGENERIC_DO(popcntd, u64)
2199
2200 #undef popcntb
2201 #undef popcnth
2202 #undef popcntw
2203 #undef popcntd
2204
2205 #undef VGENERIC_DO
2206
2207 #if HOST_BIG_ENDIAN
2208 #define QW_ONE { .u64 = { 0, 1 } }
2209 #else
2210 #define QW_ONE { .u64 = { 1, 0 } }
2211 #endif
2212
2213 #ifndef CONFIG_INT128
2214
2215 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2216 {
2217 t->u64[0] = ~a.u64[0];
2218 t->u64[1] = ~a.u64[1];
2219 }
2220
2221 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2222 {
2223 if (a.VsrD(0) < b.VsrD(0)) {
2224 return -1;
2225 } else if (a.VsrD(0) > b.VsrD(0)) {
2226 return 1;
2227 } else if (a.VsrD(1) < b.VsrD(1)) {
2228 return -1;
2229 } else if (a.VsrD(1) > b.VsrD(1)) {
2230 return 1;
2231 } else {
2232 return 0;
2233 }
2234 }
2235
2236 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2237 {
2238 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2239 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2240 (~a.VsrD(1) < b.VsrD(1));
2241 }
2242
2243 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2244 {
2245 ppc_avr_t not_a;
2246 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2247 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2248 (~a.VsrD(1) < b.VsrD(1));
2249 avr_qw_not(&not_a, a);
2250 return avr_qw_cmpu(not_a, b) < 0;
2251 }
2252
2253 #endif
2254
2255 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2256 {
2257 #ifdef CONFIG_INT128
2258 r->u128 = a->u128 + b->u128;
2259 #else
2260 avr_qw_add(r, *a, *b);
2261 #endif
2262 }
2263
2264 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2265 {
2266 #ifdef CONFIG_INT128
2267 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2268 #else
2269
2270 if (c->VsrD(1) & 1) {
2271 ppc_avr_t tmp;
2272
2273 tmp.VsrD(0) = 0;
2274 tmp.VsrD(1) = c->VsrD(1) & 1;
2275 avr_qw_add(&tmp, *a, tmp);
2276 avr_qw_add(r, tmp, *b);
2277 } else {
2278 avr_qw_add(r, *a, *b);
2279 }
2280 #endif
2281 }
2282
2283 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2284 {
2285 #ifdef CONFIG_INT128
2286 r->u128 = (~a->u128 < b->u128);
2287 #else
2288 ppc_avr_t not_a;
2289
2290 avr_qw_not(&not_a, *a);
2291
2292 r->VsrD(0) = 0;
2293 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2294 #endif
2295 }
2296
2297 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2298 {
2299 #ifdef CONFIG_INT128
2300 int carry_out = (~a->u128 < b->u128);
2301 if (!carry_out && (c->u128 & 1)) {
2302 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2303 ((a->u128 != 0) || (b->u128 != 0));
2304 }
2305 r->u128 = carry_out;
2306 #else
2307
2308 int carry_in = c->VsrD(1) & 1;
2309 int carry_out = 0;
2310 ppc_avr_t tmp;
2311
2312 carry_out = avr_qw_addc(&tmp, *a, *b);
2313
2314 if (!carry_out && carry_in) {
2315 ppc_avr_t one = QW_ONE;
2316 carry_out = avr_qw_addc(&tmp, tmp, one);
2317 }
2318 r->VsrD(0) = 0;
2319 r->VsrD(1) = carry_out;
2320 #endif
2321 }
2322
2323 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2324 {
2325 #ifdef CONFIG_INT128
2326 r->u128 = a->u128 - b->u128;
2327 #else
2328 ppc_avr_t tmp;
2329 ppc_avr_t one = QW_ONE;
2330
2331 avr_qw_not(&tmp, *b);
2332 avr_qw_add(&tmp, *a, tmp);
2333 avr_qw_add(r, tmp, one);
2334 #endif
2335 }
2336
2337 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2338 {
2339 #ifdef CONFIG_INT128
2340 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2341 #else
2342 ppc_avr_t tmp, sum;
2343
2344 avr_qw_not(&tmp, *b);
2345 avr_qw_add(&sum, *a, tmp);
2346
2347 tmp.VsrD(0) = 0;
2348 tmp.VsrD(1) = c->VsrD(1) & 1;
2349 avr_qw_add(r, sum, tmp);
2350 #endif
2351 }
2352
2353 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2354 {
2355 #ifdef CONFIG_INT128
2356 r->u128 = (~a->u128 < ~b->u128) ||
2357 (a->u128 + ~b->u128 == (__uint128_t)-1);
2358 #else
2359 int carry = (avr_qw_cmpu(*a, *b) > 0);
2360 if (!carry) {
2361 ppc_avr_t tmp;
2362 avr_qw_not(&tmp, *b);
2363 avr_qw_add(&tmp, *a, tmp);
2364 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2365 }
2366 r->VsrD(0) = 0;
2367 r->VsrD(1) = carry;
2368 #endif
2369 }
2370
2371 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2372 {
2373 #ifdef CONFIG_INT128
2374 r->u128 =
2375 (~a->u128 < ~b->u128) ||
2376 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2377 #else
2378 int carry_in = c->VsrD(1) & 1;
2379 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2380 if (!carry_out && carry_in) {
2381 ppc_avr_t tmp;
2382 avr_qw_not(&tmp, *b);
2383 avr_qw_add(&tmp, *a, tmp);
2384 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2385 }
2386
2387 r->VsrD(0) = 0;
2388 r->VsrD(1) = carry_out;
2389 #endif
2390 }
2391
2392 #define BCD_PLUS_PREF_1 0xC
2393 #define BCD_PLUS_PREF_2 0xF
2394 #define BCD_PLUS_ALT_1 0xA
2395 #define BCD_NEG_PREF 0xD
2396 #define BCD_NEG_ALT 0xB
2397 #define BCD_PLUS_ALT_2 0xE
2398 #define NATIONAL_PLUS 0x2B
2399 #define NATIONAL_NEG 0x2D
2400
2401 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2402
2403 static int bcd_get_sgn(ppc_avr_t *bcd)
2404 {
2405 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2406 case BCD_PLUS_PREF_1:
2407 case BCD_PLUS_PREF_2:
2408 case BCD_PLUS_ALT_1:
2409 case BCD_PLUS_ALT_2:
2410 {
2411 return 1;
2412 }
2413
2414 case BCD_NEG_PREF:
2415 case BCD_NEG_ALT:
2416 {
2417 return -1;
2418 }
2419
2420 default:
2421 {
2422 return 0;
2423 }
2424 }
2425 }
2426
2427 static int bcd_preferred_sgn(int sgn, int ps)
2428 {
2429 if (sgn >= 0) {
2430 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2431 } else {
2432 return BCD_NEG_PREF;
2433 }
2434 }
2435
2436 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2437 {
2438 uint8_t result;
2439 if (n & 1) {
2440 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2441 } else {
2442 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2443 }
2444
2445 if (unlikely(result > 9)) {
2446 *invalid = true;
2447 }
2448 return result;
2449 }
2450
2451 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2452 {
2453 if (n & 1) {
2454 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2455 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2456 } else {
2457 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2458 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2459 }
2460 }
2461
2462 static bool bcd_is_valid(ppc_avr_t *bcd)
2463 {
2464 int i;
2465 int invalid = 0;
2466
2467 if (bcd_get_sgn(bcd) == 0) {
2468 return false;
2469 }
2470
2471 for (i = 1; i < 32; i++) {
2472 bcd_get_digit(bcd, i, &invalid);
2473 if (unlikely(invalid)) {
2474 return false;
2475 }
2476 }
2477 return true;
2478 }
2479
2480 static int bcd_cmp_zero(ppc_avr_t *bcd)
2481 {
2482 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2483 return CRF_EQ;
2484 } else {
2485 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2486 }
2487 }
2488
2489 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2490 {
2491 return reg->VsrH(7 - n);
2492 }
2493
2494 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2495 {
2496 reg->VsrH(7 - n) = val;
2497 }
2498
2499 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2500 {
2501 int i;
2502 int invalid = 0;
2503 for (i = 31; i > 0; i--) {
2504 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2505 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2506 if (unlikely(invalid)) {
2507 return 0; /* doesn't matter */
2508 } else if (dig_a > dig_b) {
2509 return 1;
2510 } else if (dig_a < dig_b) {
2511 return -1;
2512 }
2513 }
2514
2515 return 0;
2516 }
2517
2518 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2519 int *overflow)
2520 {
2521 int carry = 0;
2522 int i;
2523 int is_zero = 1;
2524
2525 for (i = 1; i <= 31; i++) {
2526 uint8_t digit = bcd_get_digit(a, i, invalid) +
2527 bcd_get_digit(b, i, invalid) + carry;
2528 is_zero &= (digit == 0);
2529 if (digit > 9) {
2530 carry = 1;
2531 digit -= 10;
2532 } else {
2533 carry = 0;
2534 }
2535
2536 bcd_put_digit(t, digit, i);
2537 }
2538
2539 *overflow = carry;
2540 return is_zero;
2541 }
2542
2543 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2544 int *overflow)
2545 {
2546 int carry = 0;
2547 int i;
2548
2549 for (i = 1; i <= 31; i++) {
2550 uint8_t digit = bcd_get_digit(a, i, invalid) -
2551 bcd_get_digit(b, i, invalid) + carry;
2552 if (digit & 0x80) {
2553 carry = -1;
2554 digit += 10;
2555 } else {
2556 carry = 0;
2557 }
2558
2559 bcd_put_digit(t, digit, i);
2560 }
2561
2562 *overflow = carry;
2563 }
2564
2565 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2566 {
2567
2568 int sgna = bcd_get_sgn(a);
2569 int sgnb = bcd_get_sgn(b);
2570 int invalid = (sgna == 0) || (sgnb == 0);
2571 int overflow = 0;
2572 int zero = 0;
2573 uint32_t cr = 0;
2574 ppc_avr_t result = { .u64 = { 0, 0 } };
2575
2576 if (!invalid) {
2577 if (sgna == sgnb) {
2578 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2579 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2580 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2581 } else {
2582 int magnitude = bcd_cmp_mag(a, b);
2583 if (magnitude > 0) {
2584 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2585 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2586 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2587 } else if (magnitude < 0) {
2588 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2589 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2590 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2591 } else {
2592 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2593 cr = CRF_EQ;
2594 }
2595 }
2596 }
2597
2598 if (unlikely(invalid)) {
2599 result.VsrD(0) = result.VsrD(1) = -1;
2600 cr = CRF_SO;
2601 } else if (overflow) {
2602 cr |= CRF_SO;
2603 } else if (zero) {
2604 cr |= CRF_EQ;
2605 }
2606
2607 *r = result;
2608
2609 return cr;
2610 }
2611
2612 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2613 {
2614 ppc_avr_t bcopy = *b;
2615 int sgnb = bcd_get_sgn(b);
2616 if (sgnb < 0) {
2617 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2618 } else if (sgnb > 0) {
2619 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2620 }
2621 /* else invalid ... defer to bcdadd code for proper handling */
2622
2623 return helper_bcdadd(r, a, &bcopy, ps);
2624 }
2625
2626 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2627 {
2628 int i;
2629 int cr = 0;
2630 uint16_t national = 0;
2631 uint16_t sgnb = get_national_digit(b, 0);
2632 ppc_avr_t ret = { .u64 = { 0, 0 } };
2633 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2634
2635 for (i = 1; i < 8; i++) {
2636 national = get_national_digit(b, i);
2637 if (unlikely(national < 0x30 || national > 0x39)) {
2638 invalid = 1;
2639 break;
2640 }
2641
2642 bcd_put_digit(&ret, national & 0xf, i);
2643 }
2644
2645 if (sgnb == NATIONAL_PLUS) {
2646 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2647 } else {
2648 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2649 }
2650
2651 cr = bcd_cmp_zero(&ret);
2652
2653 if (unlikely(invalid)) {
2654 cr = CRF_SO;
2655 }
2656
2657 *r = ret;
2658
2659 return cr;
2660 }
2661
2662 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2663 {
2664 int i;
2665 int cr = 0;
2666 int sgnb = bcd_get_sgn(b);
2667 int invalid = (sgnb == 0);
2668 ppc_avr_t ret = { .u64 = { 0, 0 } };
2669
2670 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2671
2672 for (i = 1; i < 8; i++) {
2673 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2674
2675 if (unlikely(invalid)) {
2676 break;
2677 }
2678 }
2679 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2680
2681 cr = bcd_cmp_zero(b);
2682
2683 if (ox_flag) {
2684 cr |= CRF_SO;
2685 }
2686
2687 if (unlikely(invalid)) {
2688 cr = CRF_SO;
2689 }
2690
2691 *r = ret;
2692
2693 return cr;
2694 }
2695
2696 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2697 {
2698 int i;
2699 int cr = 0;
2700 int invalid = 0;
2701 int zone_digit = 0;
2702 int zone_lead = ps ? 0xF : 0x3;
2703 int digit = 0;
2704 ppc_avr_t ret = { .u64 = { 0, 0 } };
2705 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2706
2707 if (unlikely((sgnb < 0xA) && ps)) {
2708 invalid = 1;
2709 }
2710
2711 for (i = 0; i < 16; i++) {
2712 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2713 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2714 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2715 invalid = 1;
2716 break;
2717 }
2718
2719 bcd_put_digit(&ret, digit, i + 1);
2720 }
2721
2722 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2723 (!ps && (sgnb & 0x4))) {
2724 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2725 } else {
2726 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2727 }
2728
2729 cr = bcd_cmp_zero(&ret);
2730
2731 if (unlikely(invalid)) {
2732 cr = CRF_SO;
2733 }
2734
2735 *r = ret;
2736
2737 return cr;
2738 }
2739
2740 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2741 {
2742 int i;
2743 int cr = 0;
2744 uint8_t digit = 0;
2745 int sgnb = bcd_get_sgn(b);
2746 int zone_lead = (ps) ? 0xF0 : 0x30;
2747 int invalid = (sgnb == 0);
2748 ppc_avr_t ret = { .u64 = { 0, 0 } };
2749
2750 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2751
2752 for (i = 0; i < 16; i++) {
2753 digit = bcd_get_digit(b, i + 1, &invalid);
2754
2755 if (unlikely(invalid)) {
2756 break;
2757 }
2758
2759 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2760 }
2761
2762 if (ps) {
2763 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2764 } else {
2765 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2766 }
2767
2768 cr = bcd_cmp_zero(b);
2769
2770 if (ox_flag) {
2771 cr |= CRF_SO;
2772 }
2773
2774 if (unlikely(invalid)) {
2775 cr = CRF_SO;
2776 }
2777
2778 *r = ret;
2779
2780 return cr;
2781 }
2782
2783 /**
2784 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2785 *
2786 * Returns:
2787 * > 0 if ahi|alo > bhi|blo,
2788 * 0 if ahi|alo == bhi|blo,
2789 * < 0 if ahi|alo < bhi|blo
2790 */
2791 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2792 uint64_t blo, uint64_t bhi)
2793 {
2794 return (ahi == bhi) ?
2795 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2796 (ahi > bhi ? 1 : -1);
2797 }
2798
2799 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2800 {
2801 int i;
2802 int cr;
2803 uint64_t lo_value;
2804 uint64_t hi_value;
2805 uint64_t rem;
2806 ppc_avr_t ret = { .u64 = { 0, 0 } };
2807
2808 if (b->VsrSD(0) < 0) {
2809 lo_value = -b->VsrSD(1);
2810 hi_value = ~b->VsrD(0) + !lo_value;
2811 bcd_put_digit(&ret, 0xD, 0);
2812
2813 cr = CRF_LT;
2814 } else {
2815 lo_value = b->VsrD(1);
2816 hi_value = b->VsrD(0);
2817 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2818
2819 if (hi_value == 0 && lo_value == 0) {
2820 cr = CRF_EQ;
2821 } else {
2822 cr = CRF_GT;
2823 }
2824 }
2825
2826 /*
2827 * Check src limits: abs(src) <= 10^31 - 1
2828 *
2829 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2830 */
2831 if (ucmp128(lo_value, hi_value,
2832 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2833 cr |= CRF_SO;
2834
2835 /*
2836 * According to the ISA, if src wouldn't fit in the destination
2837 * register, the result is undefined.
2838 * In that case, we leave r unchanged.
2839 */
2840 } else {
2841 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2842
2843 for (i = 1; i < 16; rem /= 10, i++) {
2844 bcd_put_digit(&ret, rem % 10, i);
2845 }
2846
2847 for (; i < 32; lo_value /= 10, i++) {
2848 bcd_put_digit(&ret, lo_value % 10, i);
2849 }
2850
2851 *r = ret;
2852 }
2853
2854 return cr;
2855 }
2856
2857 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2858 {
2859 uint8_t i;
2860 int cr;
2861 uint64_t carry;
2862 uint64_t unused;
2863 uint64_t lo_value;
2864 uint64_t hi_value = 0;
2865 int sgnb = bcd_get_sgn(b);
2866 int invalid = (sgnb == 0);
2867
2868 lo_value = bcd_get_digit(b, 31, &invalid);
2869 for (i = 30; i > 0; i--) {
2870 mulu64(&lo_value, &carry, lo_value, 10ULL);
2871 mulu64(&hi_value, &unused, hi_value, 10ULL);
2872 lo_value += bcd_get_digit(b, i, &invalid);
2873 hi_value += carry;
2874
2875 if (unlikely(invalid)) {
2876 break;
2877 }
2878 }
2879
2880 if (sgnb == -1) {
2881 r->VsrSD(1) = -lo_value;
2882 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2883 } else {
2884 r->VsrSD(1) = lo_value;
2885 r->VsrSD(0) = hi_value;
2886 }
2887
2888 cr = bcd_cmp_zero(b);
2889
2890 if (unlikely(invalid)) {
2891 cr = CRF_SO;
2892 }
2893
2894 return cr;
2895 }
2896
2897 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2898 {
2899 int i;
2900 int invalid = 0;
2901
2902 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2903 return CRF_SO;
2904 }
2905
2906 *r = *a;
2907 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2908
2909 for (i = 1; i < 32; i++) {
2910 bcd_get_digit(a, i, &invalid);
2911 bcd_get_digit(b, i, &invalid);
2912 if (unlikely(invalid)) {
2913 return CRF_SO;
2914 }
2915 }
2916
2917 return bcd_cmp_zero(r);
2918 }
2919
2920 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2921 {
2922 int sgnb = bcd_get_sgn(b);
2923
2924 *r = *b;
2925 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2926
2927 if (bcd_is_valid(b) == false) {
2928 return CRF_SO;
2929 }
2930
2931 return bcd_cmp_zero(r);
2932 }
2933
2934 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2935 {
2936 int cr;
2937 int i = a->VsrSB(7);
2938 bool ox_flag = false;
2939 int sgnb = bcd_get_sgn(b);
2940 ppc_avr_t ret = *b;
2941 ret.VsrD(1) &= ~0xf;
2942
2943 if (bcd_is_valid(b) == false) {
2944 return CRF_SO;
2945 }
2946
2947 if (unlikely(i > 31)) {
2948 i = 31;
2949 } else if (unlikely(i < -31)) {
2950 i = -31;
2951 }
2952
2953 if (i > 0) {
2954 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2955 } else {
2956 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2957 }
2958 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2959
2960 *r = ret;
2961
2962 cr = bcd_cmp_zero(r);
2963 if (ox_flag) {
2964 cr |= CRF_SO;
2965 }
2966
2967 return cr;
2968 }
2969
2970 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2971 {
2972 int cr;
2973 int i;
2974 int invalid = 0;
2975 bool ox_flag = false;
2976 ppc_avr_t ret = *b;
2977
2978 for (i = 0; i < 32; i++) {
2979 bcd_get_digit(b, i, &invalid);
2980
2981 if (unlikely(invalid)) {
2982 return CRF_SO;
2983 }
2984 }
2985
2986 i = a->VsrSB(7);
2987 if (i >= 32) {
2988 ox_flag = true;
2989 ret.VsrD(1) = ret.VsrD(0) = 0;
2990 } else if (i <= -32) {
2991 ret.VsrD(1) = ret.VsrD(0) = 0;
2992 } else if (i > 0) {
2993 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2994 } else {
2995 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2996 }
2997 *r = ret;
2998
2999 cr = bcd_cmp_zero(r);
3000 if (ox_flag) {
3001 cr |= CRF_SO;
3002 }
3003
3004 return cr;
3005 }
3006
3007 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3008 {
3009 int cr;
3010 int unused = 0;
3011 int invalid = 0;
3012 bool ox_flag = false;
3013 int sgnb = bcd_get_sgn(b);
3014 ppc_avr_t ret = *b;
3015 ret.VsrD(1) &= ~0xf;
3016
3017 int i = a->VsrSB(7);
3018 ppc_avr_t bcd_one;
3019
3020 bcd_one.VsrD(0) = 0;
3021 bcd_one.VsrD(1) = 0x10;
3022
3023 if (bcd_is_valid(b) == false) {
3024 return CRF_SO;
3025 }
3026
3027 if (unlikely(i > 31)) {
3028 i = 31;
3029 } else if (unlikely(i < -31)) {
3030 i = -31;
3031 }
3032
3033 if (i > 0) {
3034 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
3035 } else {
3036 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
3037
3038 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3039 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3040 }
3041 }
3042 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3043
3044 cr = bcd_cmp_zero(&ret);
3045 if (ox_flag) {
3046 cr |= CRF_SO;
3047 }
3048 *r = ret;
3049
3050 return cr;
3051 }
3052
3053 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3054 {
3055 uint64_t mask;
3056 uint32_t ox_flag = 0;
3057 int i = a->VsrSH(3) + 1;
3058 ppc_avr_t ret = *b;
3059
3060 if (bcd_is_valid(b) == false) {
3061 return CRF_SO;
3062 }
3063
3064 if (i > 16 && i < 32) {
3065 mask = (uint64_t)-1 >> (128 - i * 4);
3066 if (ret.VsrD(0) & ~mask) {
3067 ox_flag = CRF_SO;
3068 }
3069
3070 ret.VsrD(0) &= mask;
3071 } else if (i >= 0 && i <= 16) {
3072 mask = (uint64_t)-1 >> (64 - i * 4);
3073 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3074 ox_flag = CRF_SO;
3075 }
3076
3077 ret.VsrD(1) &= mask;
3078 ret.VsrD(0) = 0;
3079 }
3080 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3081 *r = ret;
3082
3083 return bcd_cmp_zero(&ret) | ox_flag;
3084 }
3085
3086 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3087 {
3088 int i;
3089 uint64_t mask;
3090 uint32_t ox_flag = 0;
3091 int invalid = 0;
3092 ppc_avr_t ret = *b;
3093
3094 for (i = 0; i < 32; i++) {
3095 bcd_get_digit(b, i, &invalid);
3096
3097 if (unlikely(invalid)) {
3098 return CRF_SO;
3099 }
3100 }
3101
3102 i = a->VsrSH(3);
3103 if (i > 16 && i < 33) {
3104 mask = (uint64_t)-1 >> (128 - i * 4);
3105 if (ret.VsrD(0) & ~mask) {
3106 ox_flag = CRF_SO;
3107 }
3108
3109 ret.VsrD(0) &= mask;
3110 } else if (i > 0 && i <= 16) {
3111 mask = (uint64_t)-1 >> (64 - i * 4);
3112 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3113 ox_flag = CRF_SO;
3114 }
3115
3116 ret.VsrD(1) &= mask;
3117 ret.VsrD(0) = 0;
3118 } else if (i == 0) {
3119 if (ret.VsrD(0) || ret.VsrD(1)) {
3120 ox_flag = CRF_SO;
3121 }
3122 ret.VsrD(0) = ret.VsrD(1) = 0;
3123 }
3124
3125 *r = ret;
3126 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3127 return ox_flag | CRF_EQ;
3128 }
3129
3130 return ox_flag | CRF_GT;
3131 }
3132
3133 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3134 {
3135 int i;
3136 VECTOR_FOR_INORDER_I(i, u8) {
3137 r->u8[i] = AES_sbox[a->u8[i]];
3138 }
3139 }
3140
3141 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3142 {
3143 ppc_avr_t result;
3144 int i;
3145
3146 VECTOR_FOR_INORDER_I(i, u32) {
3147 result.VsrW(i) = b->VsrW(i) ^
3148 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3149 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3150 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3151 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3152 }
3153 *r = result;
3154 }
3155
3156 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3157 {
3158 ppc_avr_t result;
3159 int i;
3160
3161 VECTOR_FOR_INORDER_I(i, u8) {
3162 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3163 }
3164 *r = result;
3165 }
3166
3167 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3168 {
3169 /* This differs from what is written in ISA V2.07. The RTL is */
3170 /* incorrect and will be fixed in V2.07B. */
3171 int i;
3172 ppc_avr_t tmp;
3173
3174 VECTOR_FOR_INORDER_I(i, u8) {
3175 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3176 }
3177
3178 VECTOR_FOR_INORDER_I(i, u32) {
3179 r->VsrW(i) =
3180 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3181 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3182 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3183 AES_imc[tmp.VsrB(4 * i + 3)][3];
3184 }
3185 }
3186
3187 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3188 {
3189 ppc_avr_t result;
3190 int i;
3191
3192 VECTOR_FOR_INORDER_I(i, u8) {
3193 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3194 }
3195 *r = result;
3196 }
3197
3198 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3199 {
3200 int st = (st_six & 0x10) != 0;
3201 int six = st_six & 0xF;
3202 int i;
3203
3204 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3205 if (st == 0) {
3206 if ((six & (0x8 >> i)) == 0) {
3207 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3208 ror32(a->VsrW(i), 18) ^
3209 (a->VsrW(i) >> 3);
3210 } else { /* six.bit[i] == 1 */
3211 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3212 ror32(a->VsrW(i), 19) ^
3213 (a->VsrW(i) >> 10);
3214 }
3215 } else { /* st == 1 */
3216 if ((six & (0x8 >> i)) == 0) {
3217 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3218 ror32(a->VsrW(i), 13) ^
3219 ror32(a->VsrW(i), 22);
3220 } else { /* six.bit[i] == 1 */
3221 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3222 ror32(a->VsrW(i), 11) ^
3223 ror32(a->VsrW(i), 25);
3224 }
3225 }
3226 }
3227 }
3228
3229 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3230 {
3231 int st = (st_six & 0x10) != 0;
3232 int six = st_six & 0xF;
3233 int i;
3234
3235 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3236 if (st == 0) {
3237 if ((six & (0x8 >> (2 * i))) == 0) {
3238 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3239 ror64(a->VsrD(i), 8) ^
3240 (a->VsrD(i) >> 7);
3241 } else { /* six.bit[2*i] == 1 */
3242 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3243 ror64(a->VsrD(i), 61) ^
3244 (a->VsrD(i) >> 6);
3245 }
3246 } else { /* st == 1 */
3247 if ((six & (0x8 >> (2 * i))) == 0) {
3248 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3249 ror64(a->VsrD(i), 34) ^
3250 ror64(a->VsrD(i), 39);
3251 } else { /* six.bit[2*i] == 1 */
3252 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3253 ror64(a->VsrD(i), 18) ^
3254 ror64(a->VsrD(i), 41);
3255 }
3256 }
3257 }
3258 }
3259
3260 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3261 {
3262 ppc_avr_t result;
3263 int i;
3264
3265 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3266 int indexA = c->VsrB(i) >> 4;
3267 int indexB = c->VsrB(i) & 0xF;
3268
3269 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3270 }
3271 *r = result;
3272 }
3273
3274 #undef VECTOR_FOR_INORDER_I
3275
3276 /*****************************************************************************/
3277 /* SPE extension helpers */
3278 /* Use a table to make this quicker */
3279 static const uint8_t hbrev[16] = {
3280 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3281 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3282 };
3283
3284 static inline uint8_t byte_reverse(uint8_t val)
3285 {
3286 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3287 }
3288
3289 static inline uint32_t word_reverse(uint32_t val)
3290 {
3291 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3292 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3293 }
3294
3295 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3296 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3297 {
3298 uint32_t a, b, d, mask;
3299
3300 mask = UINT32_MAX >> (32 - MASKBITS);
3301 a = arg1 & mask;
3302 b = arg2 & mask;
3303 d = word_reverse(1 + word_reverse(a | ~b));
3304 return (arg1 & ~mask) | (d & b);
3305 }
3306
3307 uint32_t helper_cntlsw32(uint32_t val)
3308 {
3309 if (val & 0x80000000) {
3310 return clz32(~val);
3311 } else {
3312 return clz32(val);
3313 }
3314 }
3315
3316 uint32_t helper_cntlzw32(uint32_t val)
3317 {
3318 return clz32(val);
3319 }
3320
3321 /* 440 specific */
3322 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3323 target_ulong low, uint32_t update_Rc)
3324 {
3325 target_ulong mask;
3326 int i;
3327
3328 i = 1;
3329 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3330 if ((high & mask) == 0) {
3331 if (update_Rc) {
3332 env->crf[0] = 0x4;
3333 }
3334 goto done;
3335 }
3336 i++;
3337 }
3338 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3339 if ((low & mask) == 0) {
3340 if (update_Rc) {
3341 env->crf[0] = 0x8;
3342 }
3343 goto done;
3344 }
3345 i++;
3346 }
3347 i = 8;
3348 if (update_Rc) {
3349 env->crf[0] = 0x2;
3350 }
3351 done:
3352 env->xer = (env->xer & ~0x7F) | i;
3353 if (update_Rc) {
3354 env->crf[0] |= xer_so;
3355 }
3356 return i;
3357 }