]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target/ppc: Implement xxeval
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
32
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
36
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 {
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
43 }
44 }
45
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48 {
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
68 }
69
70 return (target_ulong)rt;
71 }
72
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75 {
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
96 }
97
98 return (target_ulong)rt;
99 }
100
101 #if defined(TARGET_PPC64)
102
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 {
105 uint64_t rt = 0;
106 int overflow = 0;
107
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
113 }
114
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
117 }
118
119 return rt;
120 }
121
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 {
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
128
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
134 }
135
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
138 }
139
140 return rt;
141 }
142
143 #endif
144
145
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
150 /*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 {
163 return hasvalue(rb, ra) ? CRF_GT : 0;
164 }
165
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
169
170 /*
171 * Return a random number.
172 */
173 uint64_t helper_darn32(void)
174 {
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
184
185 return ret;
186 }
187
188 uint64_t helper_darn64(void)
189 {
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
199
200 return ret;
201 }
202
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 {
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217 }
218
219 #endif
220
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 {
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234 }
235
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
239 {
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
250 }
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
254 }
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
258 }
259 return (target_long)ret;
260 }
261
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
265 {
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
276 }
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
280 }
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
284 }
285 return ret;
286 }
287 #endif
288
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
291 {
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300 }
301
302 target_ulong helper_popcntw(target_ulong val)
303 {
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316 }
317 #else
318 target_ulong helper_popcntb(target_ulong val)
319 {
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325 }
326 #endif
327
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
329 {
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388 }
389
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 {
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406 }
407
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 {
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424 }
425
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if defined(HOST_WORDS_BIGENDIAN)
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
435
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
478
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 {
481 ppc_store_vscr(env, vscr);
482 }
483
484 uint32_t helper_mfvscr(CPUPPCState *env)
485 {
486 return ppc_get_vscr(env);
487 }
488
489 static inline void set_vscr_sat(CPUPPCState *env)
490 {
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
493 }
494
495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 {
497 int i;
498
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
501 }
502 }
503
504 /* vprtybw */
505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 {
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
512 }
513 }
514
515 /* vprtybd */
516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 {
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
524 }
525 }
526
527 /* vprtybq */
528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 {
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
536 }
537
538 #define VARITHFP(suffix, func) \
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
541 { \
542 int i; \
543 \
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
546 } \
547 }
548 VARITHFP(addfp, float32_add)
549 VARITHFP(subfp, float32_sub)
550 VARITHFP(minfp, float32_min)
551 VARITHFP(maxfp, float32_max)
552 #undef VARITHFP
553
554 #define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
557 { \
558 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
562 } \
563 }
564 VARITHFPFMA(maddfp, 0);
565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566 #undef VARITHFPFMA
567
568 #define VARITHSAT_CASE(type, op, cvt, element) \
569 { \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
572 }
573
574 #define VARITHSAT_DO(name, op, optype, cvt, element) \
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 { \
578 int sat = 0; \
579 int i; \
580 \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
582 VARITHSAT_CASE(optype, op, cvt, element); \
583 } \
584 if (sat) { \
585 vscr_sat->u32[0] = 1; \
586 } \
587 }
588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600 #undef VARITHSAT_CASE
601 #undef VARITHSAT_DO
602 #undef VARITHSAT_SIGNED
603 #undef VARITHSAT_UNSIGNED
604
605 #define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 { \
608 int i; \
609 \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
613 } \
614 }
615
616 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620 VAVG(b, s8, int16_t, u8, uint16_t)
621 VAVG(h, s16, int32_t, u16, uint32_t)
622 VAVG(w, s32, int64_t, u32, uint64_t)
623 #undef VAVG_DO
624 #undef VAVG
625
626 #define VABSDU_DO(name, element) \
627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 int i; \
630 \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
635 } \
636 }
637
638 /*
639 * VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
642 */
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645 VABSDU(b, u8)
646 VABSDU(h, u16)
647 VABSDU(w, u32)
648 #undef VABSDU_DO
649 #undef VABSDU
650
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
654 { \
655 int i; \
656 \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
660 } \
661 }
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
664 #undef VCF
665
666 #define VCMPNEZ(NAME, ELEM) \
667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 { \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
672 } \
673 }
674 VCMPNEZ(VCMPNEZB, u8)
675 VCMPNEZ(VCMPNEZH, u16)
676 VCMPNEZ(VCMPNEZW, u32)
677 #undef VCMPNEZ
678
679 #define VCMPFP_DO(suffix, compare, order, record) \
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
682 { \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
687 \
688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
689 uint32_t result; \
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
699 } \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
703 } \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
706 } \
707 }
708 #define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711 VCMPFP(eqfp, ==, float_relation_equal)
712 VCMPFP(gefp, !=, float_relation_less)
713 VCMPFP(gtfp, ==, float_relation_greater)
714 #undef VCMPFP_DO
715 #undef VCMPFP
716
717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
719 {
720 int i;
721 int all_in = 0;
722
723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 all_in = 1;
729 } else {
730 float32 bneg = float32_chs(b->f32[i]);
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
735
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
738 }
739 }
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
742 }
743 }
744
745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 0);
748 }
749
750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
752 {
753 vcmpbfp_internal(env, r, a, b, 1);
754 }
755
756 #define VCT(suffix, satcvt, element) \
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
759 { \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
763 \
764 set_float_rounding_mode(float_round_to_zero, &s); \
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
767 r->element[i] = 0; \
768 } else { \
769 float64 t = float32_to_float64(b->f32[i], &s); \
770 int64_t j; \
771 \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
775 } \
776 } \
777 if (sat) { \
778 set_vscr_sat(env); \
779 } \
780 }
781 VCT(uxs, cvtsduw, u32)
782 VCT(sxs, cvtsdsw, s32)
783 #undef VCT
784
785 target_ulong helper_vclzlsbb(ppc_avr_t *r)
786 {
787 target_ulong count = 0;
788 int i;
789 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
790 if (r->VsrB(i) & 0x01) {
791 break;
792 }
793 count++;
794 }
795 return count;
796 }
797
798 target_ulong helper_vctzlsbb(ppc_avr_t *r)
799 {
800 target_ulong count = 0;
801 int i;
802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
803 if (r->VsrB(i) & 0x01) {
804 break;
805 }
806 count++;
807 }
808 return count;
809 }
810
811 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
812 ppc_avr_t *b, ppc_avr_t *c)
813 {
814 int sat = 0;
815 int i;
816
817 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
818 int32_t prod = a->s16[i] * b->s16[i];
819 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
820
821 r->s16[i] = cvtswsh(t, &sat);
822 }
823
824 if (sat) {
825 set_vscr_sat(env);
826 }
827 }
828
829 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
830 ppc_avr_t *b, ppc_avr_t *c)
831 {
832 int sat = 0;
833 int i;
834
835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
836 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
837 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
838 r->s16[i] = cvtswsh(t, &sat);
839 }
840
841 if (sat) {
842 set_vscr_sat(env);
843 }
844 }
845
846 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
847 {
848 int i;
849
850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
851 int32_t prod = a->s16[i] * b->s16[i];
852 r->s16[i] = (int16_t) (prod + c->s16[i]);
853 }
854 }
855
856 #define VMRG_DO(name, element, access, ofs) \
857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
858 { \
859 ppc_avr_t result; \
860 int i, half = ARRAY_SIZE(r->element) / 2; \
861 \
862 for (i = 0; i < half; i++) { \
863 result.access(i * 2 + 0) = a->access(i + ofs); \
864 result.access(i * 2 + 1) = b->access(i + ofs); \
865 } \
866 *r = result; \
867 }
868
869 #define VMRG(suffix, element, access) \
870 VMRG_DO(mrgl##suffix, element, access, half) \
871 VMRG_DO(mrgh##suffix, element, access, 0)
872 VMRG(b, u8, VsrB)
873 VMRG(h, u16, VsrH)
874 VMRG(w, u32, VsrW)
875 #undef VMRG_DO
876 #undef VMRG
877
878 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b, ppc_avr_t *c)
880 {
881 int32_t prod[16];
882 int i;
883
884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
885 prod[i] = (int32_t)a->s8[i] * b->u8[i];
886 }
887
888 VECTOR_FOR_INORDER_I(i, s32) {
889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
890 prod[4 * i + 2] + prod[4 * i + 3];
891 }
892 }
893
894 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
896 {
897 int32_t prod[8];
898 int i;
899
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 prod[i] = a->s16[i] * b->s16[i];
902 }
903
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
906 }
907 }
908
909 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
911 {
912 int32_t prod[8];
913 int i;
914 int sat = 0;
915
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = (int32_t)a->s16[i] * b->s16[i];
918 }
919
920 VECTOR_FOR_INORDER_I(i, s32) {
921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922
923 r->u32[i] = cvtsdsw(t, &sat);
924 }
925
926 if (sat) {
927 set_vscr_sat(env);
928 }
929 }
930
931 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
932 ppc_avr_t *b, ppc_avr_t *c)
933 {
934 uint16_t prod[16];
935 int i;
936
937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
938 prod[i] = a->u8[i] * b->u8[i];
939 }
940
941 VECTOR_FOR_INORDER_I(i, u32) {
942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
943 prod[4 * i + 2] + prod[4 * i + 3];
944 }
945 }
946
947 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
949 {
950 uint32_t prod[8];
951 int i;
952
953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
954 prod[i] = a->u16[i] * b->u16[i];
955 }
956
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
959 }
960 }
961
962 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
963 ppc_avr_t *b, ppc_avr_t *c)
964 {
965 uint32_t prod[8];
966 int i;
967 int sat = 0;
968
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
971 }
972
973 VECTOR_FOR_INORDER_I(i, s32) {
974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975
976 r->u32[i] = cvtuduw(t, &sat);
977 }
978
979 if (sat) {
980 set_vscr_sat(env);
981 }
982 }
983
984 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
985 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 { \
987 int i; \
988 \
989 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
990 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
991 (cast)b->mul_access(i); \
992 } \
993 }
994
995 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
996 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
997 { \
998 int i; \
999 \
1000 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1001 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1002 (cast)b->mul_access(i + 1); \
1003 } \
1004 }
1005
1006 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1008 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1009 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1010 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1011 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1012 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1013 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1014 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1015 #undef VMUL_DO_EVN
1016 #undef VMUL_DO_ODD
1017 #undef VMUL
1018
1019 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1020 target_ulong uim)
1021 {
1022 int i, idx;
1023 ppc_vsr_t tmp = { .u64 = {0, 0} };
1024
1025 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1026 if ((pcv->VsrB(i) >> 5) == uim) {
1027 idx = pcv->VsrB(i) & 0x1f;
1028 if (idx < ARRAY_SIZE(t->u8)) {
1029 tmp.VsrB(i) = s0->VsrB(idx);
1030 } else {
1031 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1032 }
1033 }
1034 }
1035
1036 *t = tmp;
1037 }
1038
1039 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1040 {
1041 ppc_avr_t result;
1042 int i;
1043
1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1045 int s = c->VsrB(i) & 0x1f;
1046 int index = s & 0xf;
1047
1048 if (s & 0x10) {
1049 result.VsrB(i) = b->VsrB(index);
1050 } else {
1051 result.VsrB(i) = a->VsrB(index);
1052 }
1053 }
1054 *r = result;
1055 }
1056
1057 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1058 {
1059 ppc_avr_t result;
1060 int i;
1061
1062 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1063 int s = c->VsrB(i) & 0x1f;
1064 int index = 15 - (s & 0xf);
1065
1066 if (s & 0x10) {
1067 result.VsrB(i) = a->VsrB(index);
1068 } else {
1069 result.VsrB(i) = b->VsrB(index);
1070 }
1071 }
1072 *r = result;
1073 }
1074
1075 #if defined(HOST_WORDS_BIGENDIAN)
1076 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1077 #define VBPERMD_INDEX(i) (i)
1078 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1079 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1080 #else
1081 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1082 #define VBPERMD_INDEX(i) (1 - i)
1083 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1084 #define EXTRACT_BIT(avr, i, index) \
1085 (extract64((avr)->u64[1 - i], 63 - index, 1))
1086 #endif
1087
1088 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1089 {
1090 int i, j;
1091 ppc_avr_t result = { .u64 = { 0, 0 } };
1092 VECTOR_FOR_INORDER_I(i, u64) {
1093 for (j = 0; j < 8; j++) {
1094 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1095 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1096 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1097 }
1098 }
1099 }
1100 *r = result;
1101 }
1102
1103 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1104 {
1105 int i;
1106 uint64_t perm = 0;
1107
1108 VECTOR_FOR_INORDER_I(i, u8) {
1109 int index = VBPERMQ_INDEX(b, i);
1110
1111 if (index < 128) {
1112 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1113 if (a->u64[VBPERMQ_DW(index)] & mask) {
1114 perm |= (0x8000 >> i);
1115 }
1116 }
1117 }
1118
1119 r->VsrD(0) = perm;
1120 r->VsrD(1) = 0;
1121 }
1122
1123 #undef VBPERMQ_INDEX
1124 #undef VBPERMQ_DW
1125
1126 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1127 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1128 { \
1129 int i, j; \
1130 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1131 \
1132 VECTOR_FOR_INORDER_I(i, srcfld) { \
1133 prod[i] = 0; \
1134 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1135 if (a->srcfld[i] & (1ull << j)) { \
1136 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1137 } \
1138 } \
1139 } \
1140 \
1141 VECTOR_FOR_INORDER_I(i, trgfld) { \
1142 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1143 } \
1144 }
1145
1146 PMSUM(vpmsumb, u8, u16, uint16_t)
1147 PMSUM(vpmsumh, u16, u32, uint32_t)
1148 PMSUM(vpmsumw, u32, u64, uint64_t)
1149
1150 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1151 {
1152
1153 #ifdef CONFIG_INT128
1154 int i, j;
1155 __uint128_t prod[2];
1156
1157 VECTOR_FOR_INORDER_I(i, u64) {
1158 prod[i] = 0;
1159 for (j = 0; j < 64; j++) {
1160 if (a->u64[i] & (1ull << j)) {
1161 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1162 }
1163 }
1164 }
1165
1166 r->u128 = prod[0] ^ prod[1];
1167
1168 #else
1169 int i, j;
1170 ppc_avr_t prod[2];
1171
1172 VECTOR_FOR_INORDER_I(i, u64) {
1173 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1174 for (j = 0; j < 64; j++) {
1175 if (a->u64[i] & (1ull << j)) {
1176 ppc_avr_t bshift;
1177 if (j == 0) {
1178 bshift.VsrD(0) = 0;
1179 bshift.VsrD(1) = b->u64[i];
1180 } else {
1181 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1182 bshift.VsrD(1) = b->u64[i] << j;
1183 }
1184 prod[i].VsrD(1) ^= bshift.VsrD(1);
1185 prod[i].VsrD(0) ^= bshift.VsrD(0);
1186 }
1187 }
1188 }
1189
1190 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1191 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1192 #endif
1193 }
1194
1195
1196 #if defined(HOST_WORDS_BIGENDIAN)
1197 #define PKBIG 1
1198 #else
1199 #define PKBIG 0
1200 #endif
1201 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1202 {
1203 int i, j;
1204 ppc_avr_t result;
1205 #if defined(HOST_WORDS_BIGENDIAN)
1206 const ppc_avr_t *x[2] = { a, b };
1207 #else
1208 const ppc_avr_t *x[2] = { b, a };
1209 #endif
1210
1211 VECTOR_FOR_INORDER_I(i, u64) {
1212 VECTOR_FOR_INORDER_I(j, u32) {
1213 uint32_t e = x[i]->u32[j];
1214
1215 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1216 ((e >> 6) & 0x3e0) |
1217 ((e >> 3) & 0x1f));
1218 }
1219 }
1220 *r = result;
1221 }
1222
1223 #define VPK(suffix, from, to, cvt, dosat) \
1224 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1225 ppc_avr_t *a, ppc_avr_t *b) \
1226 { \
1227 int i; \
1228 int sat = 0; \
1229 ppc_avr_t result; \
1230 ppc_avr_t *a0 = PKBIG ? a : b; \
1231 ppc_avr_t *a1 = PKBIG ? b : a; \
1232 \
1233 VECTOR_FOR_INORDER_I(i, from) { \
1234 result.to[i] = cvt(a0->from[i], &sat); \
1235 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1236 } \
1237 *r = result; \
1238 if (dosat && sat) { \
1239 set_vscr_sat(env); \
1240 } \
1241 }
1242 #define I(x, y) (x)
1243 VPK(shss, s16, s8, cvtshsb, 1)
1244 VPK(shus, s16, u8, cvtshub, 1)
1245 VPK(swss, s32, s16, cvtswsh, 1)
1246 VPK(swus, s32, u16, cvtswuh, 1)
1247 VPK(sdss, s64, s32, cvtsdsw, 1)
1248 VPK(sdus, s64, u32, cvtsduw, 1)
1249 VPK(uhus, u16, u8, cvtuhub, 1)
1250 VPK(uwus, u32, u16, cvtuwuh, 1)
1251 VPK(udus, u64, u32, cvtuduw, 1)
1252 VPK(uhum, u16, u8, I, 0)
1253 VPK(uwum, u32, u16, I, 0)
1254 VPK(udum, u64, u32, I, 0)
1255 #undef I
1256 #undef VPK
1257 #undef PKBIG
1258
1259 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1260 {
1261 int i;
1262
1263 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1264 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1265 }
1266 }
1267
1268 #define VRFI(suffix, rounding) \
1269 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1270 ppc_avr_t *b) \
1271 { \
1272 int i; \
1273 float_status s = env->vec_status; \
1274 \
1275 set_float_rounding_mode(rounding, &s); \
1276 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1277 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1278 } \
1279 }
1280 VRFI(n, float_round_nearest_even)
1281 VRFI(m, float_round_down)
1282 VRFI(p, float_round_up)
1283 VRFI(z, float_round_to_zero)
1284 #undef VRFI
1285
1286 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1287 {
1288 int i;
1289
1290 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1291 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1292
1293 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1294 }
1295 }
1296
1297 #define VRLMI(name, size, element, insert) \
1298 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1299 { \
1300 int i; \
1301 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1302 uint##size##_t src1 = a->element[i]; \
1303 uint##size##_t src2 = b->element[i]; \
1304 uint##size##_t src3 = r->element[i]; \
1305 uint##size##_t begin, end, shift, mask, rot_val; \
1306 \
1307 shift = extract##size(src2, 0, 6); \
1308 end = extract##size(src2, 8, 6); \
1309 begin = extract##size(src2, 16, 6); \
1310 rot_val = rol##size(src1, shift); \
1311 mask = mask_u##size(begin, end); \
1312 if (insert) { \
1313 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1314 } else { \
1315 r->element[i] = (rot_val & mask); \
1316 } \
1317 } \
1318 }
1319
1320 VRLMI(VRLDMI, 64, u64, 1);
1321 VRLMI(VRLWMI, 32, u32, 1);
1322 VRLMI(VRLDNM, 64, u64, 0);
1323 VRLMI(VRLWNM, 32, u32, 0);
1324
1325 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1326 {
1327 int i;
1328
1329 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1330 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1331 }
1332 }
1333
1334 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1335 {
1336 int i;
1337
1338 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1339 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1340 }
1341 }
1342
1343 #define VEXTU_X_DO(name, size, left) \
1344 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1345 { \
1346 int index = (a & 0xf) * 8; \
1347 if (left) { \
1348 index = 128 - index - size; \
1349 } \
1350 return int128_getlo(int128_rshift(b->s128, index)) & \
1351 MAKE_64BIT_MASK(0, size); \
1352 }
1353 VEXTU_X_DO(vextublx, 8, 1)
1354 VEXTU_X_DO(vextuhlx, 16, 1)
1355 VEXTU_X_DO(vextuwlx, 32, 1)
1356 VEXTU_X_DO(vextubrx, 8, 0)
1357 VEXTU_X_DO(vextuhrx, 16, 0)
1358 VEXTU_X_DO(vextuwrx, 32, 0)
1359 #undef VEXTU_X_DO
1360
1361 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1362 {
1363 int i;
1364 unsigned int shift, bytes, size;
1365
1366 size = ARRAY_SIZE(r->u8);
1367 for (i = 0; i < size; i++) {
1368 shift = b->VsrB(i) & 0x7; /* extract shift value */
1369 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1370 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1371 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1372 }
1373 }
1374
1375 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1376 {
1377 int i;
1378 unsigned int shift, bytes;
1379
1380 /*
1381 * Use reverse order, as destination and source register can be
1382 * same. Its being modified in place saving temporary, reverse
1383 * order will guarantee that computed result is not fed back.
1384 */
1385 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1386 shift = b->VsrB(i) & 0x7; /* extract shift value */
1387 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1388 /* extract adjacent bytes */
1389 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1390 }
1391 }
1392
1393 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1394 {
1395 int sh = shift & 0xf;
1396 int i;
1397 ppc_avr_t result;
1398
1399 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1400 int index = sh + i;
1401 if (index > 0xf) {
1402 result.VsrB(i) = b->VsrB(index - 0x10);
1403 } else {
1404 result.VsrB(i) = a->VsrB(index);
1405 }
1406 }
1407 *r = result;
1408 }
1409
1410 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1411 {
1412 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1413
1414 #if defined(HOST_WORDS_BIGENDIAN)
1415 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1416 memset(&r->u8[16 - sh], 0, sh);
1417 #else
1418 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1419 memset(&r->u8[0], 0, sh);
1420 #endif
1421 }
1422
1423 #if defined(HOST_WORDS_BIGENDIAN)
1424 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1425 #else
1426 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1427 #endif
1428
1429 #define VINSX(SUFFIX, TYPE) \
1430 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1431 uint64_t val, target_ulong index) \
1432 { \
1433 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1434 target_long idx = index; \
1435 \
1436 if (idx < 0 || idx > maxidx) { \
1437 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1438 qemu_log_mask(LOG_GUEST_ERROR, \
1439 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1440 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1441 } else { \
1442 TYPE src = val; \
1443 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1444 } \
1445 }
1446 VINSX(B, uint8_t)
1447 VINSX(H, uint16_t)
1448 VINSX(W, uint32_t)
1449 VINSX(D, uint64_t)
1450 #undef ELEM_ADDR
1451 #undef VINSX
1452 #if defined(HOST_WORDS_BIGENDIAN)
1453 #define VEXTDVLX(NAME, SIZE) \
1454 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1455 target_ulong index) \
1456 { \
1457 const target_long idx = index; \
1458 ppc_avr_t tmp[2] = { *a, *b }; \
1459 memset(t, 0, sizeof(*t)); \
1460 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1461 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1462 } else { \
1463 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1464 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1465 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1466 } \
1467 }
1468 #else
1469 #define VEXTDVLX(NAME, SIZE) \
1470 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1471 target_ulong index) \
1472 { \
1473 const target_long idx = index; \
1474 ppc_avr_t tmp[2] = { *b, *a }; \
1475 memset(t, 0, sizeof(*t)); \
1476 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1477 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1478 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1479 } else { \
1480 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1481 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1482 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1483 } \
1484 }
1485 #endif
1486 VEXTDVLX(VEXTDUBVLX, 1)
1487 VEXTDVLX(VEXTDUHVLX, 2)
1488 VEXTDVLX(VEXTDUWVLX, 4)
1489 VEXTDVLX(VEXTDDVLX, 8)
1490 #undef VEXTDVLX
1491 #if defined(HOST_WORDS_BIGENDIAN)
1492 #define VEXTRACT(suffix, element) \
1493 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1494 { \
1495 uint32_t es = sizeof(r->element[0]); \
1496 memmove(&r->u8[8 - es], &b->u8[index], es); \
1497 memset(&r->u8[8], 0, 8); \
1498 memset(&r->u8[0], 0, 8 - es); \
1499 }
1500 #else
1501 #define VEXTRACT(suffix, element) \
1502 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1503 { \
1504 uint32_t es = sizeof(r->element[0]); \
1505 uint32_t s = (16 - index) - es; \
1506 memmove(&r->u8[8], &b->u8[s], es); \
1507 memset(&r->u8[0], 0, 8); \
1508 memset(&r->u8[8 + es], 0, 8 - es); \
1509 }
1510 #endif
1511 VEXTRACT(ub, u8)
1512 VEXTRACT(uh, u16)
1513 VEXTRACT(uw, u32)
1514 VEXTRACT(d, u64)
1515 #undef VEXTRACT
1516
1517 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1518 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1519 { \
1520 int i, idx, crf = 0; \
1521 \
1522 for (i = 0; i < NUM_ELEMS; i++) { \
1523 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1524 if (b->Vsr##ELEM(idx)) { \
1525 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1526 } else { \
1527 crf = 0b0010; \
1528 break; \
1529 } \
1530 } \
1531 \
1532 for (; i < NUM_ELEMS; i++) { \
1533 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1534 t->Vsr##ELEM(idx) = 0; \
1535 } \
1536 \
1537 return crf; \
1538 }
1539 VSTRI(VSTRIBL, B, 16, true)
1540 VSTRI(VSTRIBR, B, 16, false)
1541 VSTRI(VSTRIHL, H, 8, true)
1542 VSTRI(VSTRIHR, H, 8, false)
1543 #undef VSTRI
1544
1545 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1546 ppc_vsr_t *xb, uint32_t index)
1547 {
1548 ppc_vsr_t t = { };
1549 size_t es = sizeof(uint32_t);
1550 uint32_t ext_index;
1551 int i;
1552
1553 ext_index = index;
1554 for (i = 0; i < es; i++, ext_index++) {
1555 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1556 }
1557
1558 *xt = t;
1559 }
1560
1561 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1562 ppc_vsr_t *xb, uint32_t index)
1563 {
1564 ppc_vsr_t t = *xt;
1565 size_t es = sizeof(uint32_t);
1566 int ins_index, i = 0;
1567
1568 ins_index = index;
1569 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1570 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1571 }
1572
1573 *xt = t;
1574 }
1575
1576 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1577 uint32_t desc)
1578 {
1579 /*
1580 * Instead of processing imm bit-by-bit, we'll skip the computation of
1581 * conjunctions whose corresponding bit is unset.
1582 */
1583 int bit, imm = simd_data(desc);
1584 Int128 conj, disj = int128_zero();
1585
1586 /* Iterate over set bits from the least to the most significant bit */
1587 while (imm) {
1588 /*
1589 * Get the next bit to be processed with ctz64. Invert the result of
1590 * ctz64 to match the indexing used by PowerISA.
1591 */
1592 bit = 7 - ctzl(imm);
1593 if (bit & 0x4) {
1594 conj = a->s128;
1595 } else {
1596 conj = int128_not(a->s128);
1597 }
1598 if (bit & 0x2) {
1599 conj = int128_and(conj, b->s128);
1600 } else {
1601 conj = int128_and(conj, int128_not(b->s128));
1602 }
1603 if (bit & 0x1) {
1604 conj = int128_and(conj, c->s128);
1605 } else {
1606 conj = int128_and(conj, int128_not(c->s128));
1607 }
1608 disj = int128_or(disj, conj);
1609
1610 /* Unset the least significant bit that is set */
1611 imm &= imm - 1;
1612 }
1613
1614 t->s128 = disj;
1615 }
1616
1617 #define XXBLEND(name, sz) \
1618 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1619 ppc_avr_t *c, uint32_t desc) \
1620 { \
1621 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1622 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1623 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1624 } \
1625 }
1626 XXBLEND(B, 8)
1627 XXBLEND(H, 16)
1628 XXBLEND(W, 32)
1629 XXBLEND(D, 64)
1630 #undef XXBLEND
1631
1632 #define VNEG(name, element) \
1633 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1634 { \
1635 int i; \
1636 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1637 r->element[i] = -b->element[i]; \
1638 } \
1639 }
1640 VNEG(vnegw, s32)
1641 VNEG(vnegd, s64)
1642 #undef VNEG
1643
1644 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1645 {
1646 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1647
1648 #if defined(HOST_WORDS_BIGENDIAN)
1649 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1650 memset(&r->u8[0], 0, sh);
1651 #else
1652 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1653 memset(&r->u8[16 - sh], 0, sh);
1654 #endif
1655 }
1656
1657 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1658 {
1659 int i;
1660
1661 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1662 r->u32[i] = a->u32[i] >= b->u32[i];
1663 }
1664 }
1665
1666 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1667 {
1668 int64_t t;
1669 int i, upper;
1670 ppc_avr_t result;
1671 int sat = 0;
1672
1673 upper = ARRAY_SIZE(r->s32) - 1;
1674 t = (int64_t)b->VsrSW(upper);
1675 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1676 t += a->VsrSW(i);
1677 result.VsrSW(i) = 0;
1678 }
1679 result.VsrSW(upper) = cvtsdsw(t, &sat);
1680 *r = result;
1681
1682 if (sat) {
1683 set_vscr_sat(env);
1684 }
1685 }
1686
1687 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1688 {
1689 int i, j, upper;
1690 ppc_avr_t result;
1691 int sat = 0;
1692
1693 upper = 1;
1694 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1695 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1696
1697 result.VsrD(i) = 0;
1698 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1699 t += a->VsrSW(2 * i + j);
1700 }
1701 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1702 }
1703
1704 *r = result;
1705 if (sat) {
1706 set_vscr_sat(env);
1707 }
1708 }
1709
1710 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1711 {
1712 int i, j;
1713 int sat = 0;
1714
1715 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1716 int64_t t = (int64_t)b->s32[i];
1717
1718 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1719 t += a->s8[4 * i + j];
1720 }
1721 r->s32[i] = cvtsdsw(t, &sat);
1722 }
1723
1724 if (sat) {
1725 set_vscr_sat(env);
1726 }
1727 }
1728
1729 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1730 {
1731 int sat = 0;
1732 int i;
1733
1734 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1735 int64_t t = (int64_t)b->s32[i];
1736
1737 t += a->s16[2 * i] + a->s16[2 * i + 1];
1738 r->s32[i] = cvtsdsw(t, &sat);
1739 }
1740
1741 if (sat) {
1742 set_vscr_sat(env);
1743 }
1744 }
1745
1746 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1747 {
1748 int i, j;
1749 int sat = 0;
1750
1751 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1752 uint64_t t = (uint64_t)b->u32[i];
1753
1754 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1755 t += a->u8[4 * i + j];
1756 }
1757 r->u32[i] = cvtuduw(t, &sat);
1758 }
1759
1760 if (sat) {
1761 set_vscr_sat(env);
1762 }
1763 }
1764
1765 #if defined(HOST_WORDS_BIGENDIAN)
1766 #define UPKHI 1
1767 #define UPKLO 0
1768 #else
1769 #define UPKHI 0
1770 #define UPKLO 1
1771 #endif
1772 #define VUPKPX(suffix, hi) \
1773 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1774 { \
1775 int i; \
1776 ppc_avr_t result; \
1777 \
1778 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1779 uint16_t e = b->u16[hi ? i : i + 4]; \
1780 uint8_t a = (e >> 15) ? 0xff : 0; \
1781 uint8_t r = (e >> 10) & 0x1f; \
1782 uint8_t g = (e >> 5) & 0x1f; \
1783 uint8_t b = e & 0x1f; \
1784 \
1785 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1786 } \
1787 *r = result; \
1788 }
1789 VUPKPX(lpx, UPKLO)
1790 VUPKPX(hpx, UPKHI)
1791 #undef VUPKPX
1792
1793 #define VUPK(suffix, unpacked, packee, hi) \
1794 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1795 { \
1796 int i; \
1797 ppc_avr_t result; \
1798 \
1799 if (hi) { \
1800 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1801 result.unpacked[i] = b->packee[i]; \
1802 } \
1803 } else { \
1804 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1805 i++) { \
1806 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1807 } \
1808 } \
1809 *r = result; \
1810 }
1811 VUPK(hsb, s16, s8, UPKHI)
1812 VUPK(hsh, s32, s16, UPKHI)
1813 VUPK(hsw, s64, s32, UPKHI)
1814 VUPK(lsb, s16, s8, UPKLO)
1815 VUPK(lsh, s32, s16, UPKLO)
1816 VUPK(lsw, s64, s32, UPKLO)
1817 #undef VUPK
1818 #undef UPKHI
1819 #undef UPKLO
1820
1821 #define VGENERIC_DO(name, element) \
1822 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1823 { \
1824 int i; \
1825 \
1826 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1827 r->element[i] = name(b->element[i]); \
1828 } \
1829 }
1830
1831 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1832 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1833
1834 VGENERIC_DO(clzb, u8)
1835 VGENERIC_DO(clzh, u16)
1836
1837 #undef clzb
1838 #undef clzh
1839
1840 #define ctzb(v) ((v) ? ctz32(v) : 8)
1841 #define ctzh(v) ((v) ? ctz32(v) : 16)
1842 #define ctzw(v) ctz32((v))
1843 #define ctzd(v) ctz64((v))
1844
1845 VGENERIC_DO(ctzb, u8)
1846 VGENERIC_DO(ctzh, u16)
1847 VGENERIC_DO(ctzw, u32)
1848 VGENERIC_DO(ctzd, u64)
1849
1850 #undef ctzb
1851 #undef ctzh
1852 #undef ctzw
1853 #undef ctzd
1854
1855 #define popcntb(v) ctpop8(v)
1856 #define popcnth(v) ctpop16(v)
1857 #define popcntw(v) ctpop32(v)
1858 #define popcntd(v) ctpop64(v)
1859
1860 VGENERIC_DO(popcntb, u8)
1861 VGENERIC_DO(popcnth, u16)
1862 VGENERIC_DO(popcntw, u32)
1863 VGENERIC_DO(popcntd, u64)
1864
1865 #undef popcntb
1866 #undef popcnth
1867 #undef popcntw
1868 #undef popcntd
1869
1870 #undef VGENERIC_DO
1871
1872 #if defined(HOST_WORDS_BIGENDIAN)
1873 #define QW_ONE { .u64 = { 0, 1 } }
1874 #else
1875 #define QW_ONE { .u64 = { 1, 0 } }
1876 #endif
1877
1878 #ifndef CONFIG_INT128
1879
1880 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1881 {
1882 t->u64[0] = ~a.u64[0];
1883 t->u64[1] = ~a.u64[1];
1884 }
1885
1886 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1887 {
1888 if (a.VsrD(0) < b.VsrD(0)) {
1889 return -1;
1890 } else if (a.VsrD(0) > b.VsrD(0)) {
1891 return 1;
1892 } else if (a.VsrD(1) < b.VsrD(1)) {
1893 return -1;
1894 } else if (a.VsrD(1) > b.VsrD(1)) {
1895 return 1;
1896 } else {
1897 return 0;
1898 }
1899 }
1900
1901 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1902 {
1903 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1904 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1905 (~a.VsrD(1) < b.VsrD(1));
1906 }
1907
1908 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1909 {
1910 ppc_avr_t not_a;
1911 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1912 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1913 (~a.VsrD(1) < b.VsrD(1));
1914 avr_qw_not(&not_a, a);
1915 return avr_qw_cmpu(not_a, b) < 0;
1916 }
1917
1918 #endif
1919
1920 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1921 {
1922 #ifdef CONFIG_INT128
1923 r->u128 = a->u128 + b->u128;
1924 #else
1925 avr_qw_add(r, *a, *b);
1926 #endif
1927 }
1928
1929 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1930 {
1931 #ifdef CONFIG_INT128
1932 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1933 #else
1934
1935 if (c->VsrD(1) & 1) {
1936 ppc_avr_t tmp;
1937
1938 tmp.VsrD(0) = 0;
1939 tmp.VsrD(1) = c->VsrD(1) & 1;
1940 avr_qw_add(&tmp, *a, tmp);
1941 avr_qw_add(r, tmp, *b);
1942 } else {
1943 avr_qw_add(r, *a, *b);
1944 }
1945 #endif
1946 }
1947
1948 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1949 {
1950 #ifdef CONFIG_INT128
1951 r->u128 = (~a->u128 < b->u128);
1952 #else
1953 ppc_avr_t not_a;
1954
1955 avr_qw_not(&not_a, *a);
1956
1957 r->VsrD(0) = 0;
1958 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1959 #endif
1960 }
1961
1962 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1963 {
1964 #ifdef CONFIG_INT128
1965 int carry_out = (~a->u128 < b->u128);
1966 if (!carry_out && (c->u128 & 1)) {
1967 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1968 ((a->u128 != 0) || (b->u128 != 0));
1969 }
1970 r->u128 = carry_out;
1971 #else
1972
1973 int carry_in = c->VsrD(1) & 1;
1974 int carry_out = 0;
1975 ppc_avr_t tmp;
1976
1977 carry_out = avr_qw_addc(&tmp, *a, *b);
1978
1979 if (!carry_out && carry_in) {
1980 ppc_avr_t one = QW_ONE;
1981 carry_out = avr_qw_addc(&tmp, tmp, one);
1982 }
1983 r->VsrD(0) = 0;
1984 r->VsrD(1) = carry_out;
1985 #endif
1986 }
1987
1988 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1989 {
1990 #ifdef CONFIG_INT128
1991 r->u128 = a->u128 - b->u128;
1992 #else
1993 ppc_avr_t tmp;
1994 ppc_avr_t one = QW_ONE;
1995
1996 avr_qw_not(&tmp, *b);
1997 avr_qw_add(&tmp, *a, tmp);
1998 avr_qw_add(r, tmp, one);
1999 #endif
2000 }
2001
2002 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2003 {
2004 #ifdef CONFIG_INT128
2005 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2006 #else
2007 ppc_avr_t tmp, sum;
2008
2009 avr_qw_not(&tmp, *b);
2010 avr_qw_add(&sum, *a, tmp);
2011
2012 tmp.VsrD(0) = 0;
2013 tmp.VsrD(1) = c->VsrD(1) & 1;
2014 avr_qw_add(r, sum, tmp);
2015 #endif
2016 }
2017
2018 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2019 {
2020 #ifdef CONFIG_INT128
2021 r->u128 = (~a->u128 < ~b->u128) ||
2022 (a->u128 + ~b->u128 == (__uint128_t)-1);
2023 #else
2024 int carry = (avr_qw_cmpu(*a, *b) > 0);
2025 if (!carry) {
2026 ppc_avr_t tmp;
2027 avr_qw_not(&tmp, *b);
2028 avr_qw_add(&tmp, *a, tmp);
2029 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2030 }
2031 r->VsrD(0) = 0;
2032 r->VsrD(1) = carry;
2033 #endif
2034 }
2035
2036 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2037 {
2038 #ifdef CONFIG_INT128
2039 r->u128 =
2040 (~a->u128 < ~b->u128) ||
2041 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2042 #else
2043 int carry_in = c->VsrD(1) & 1;
2044 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2045 if (!carry_out && carry_in) {
2046 ppc_avr_t tmp;
2047 avr_qw_not(&tmp, *b);
2048 avr_qw_add(&tmp, *a, tmp);
2049 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2050 }
2051
2052 r->VsrD(0) = 0;
2053 r->VsrD(1) = carry_out;
2054 #endif
2055 }
2056
2057 #define BCD_PLUS_PREF_1 0xC
2058 #define BCD_PLUS_PREF_2 0xF
2059 #define BCD_PLUS_ALT_1 0xA
2060 #define BCD_NEG_PREF 0xD
2061 #define BCD_NEG_ALT 0xB
2062 #define BCD_PLUS_ALT_2 0xE
2063 #define NATIONAL_PLUS 0x2B
2064 #define NATIONAL_NEG 0x2D
2065
2066 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2067
2068 static int bcd_get_sgn(ppc_avr_t *bcd)
2069 {
2070 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2071 case BCD_PLUS_PREF_1:
2072 case BCD_PLUS_PREF_2:
2073 case BCD_PLUS_ALT_1:
2074 case BCD_PLUS_ALT_2:
2075 {
2076 return 1;
2077 }
2078
2079 case BCD_NEG_PREF:
2080 case BCD_NEG_ALT:
2081 {
2082 return -1;
2083 }
2084
2085 default:
2086 {
2087 return 0;
2088 }
2089 }
2090 }
2091
2092 static int bcd_preferred_sgn(int sgn, int ps)
2093 {
2094 if (sgn >= 0) {
2095 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2096 } else {
2097 return BCD_NEG_PREF;
2098 }
2099 }
2100
2101 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2102 {
2103 uint8_t result;
2104 if (n & 1) {
2105 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2106 } else {
2107 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2108 }
2109
2110 if (unlikely(result > 9)) {
2111 *invalid = true;
2112 }
2113 return result;
2114 }
2115
2116 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2117 {
2118 if (n & 1) {
2119 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2120 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2121 } else {
2122 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2123 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2124 }
2125 }
2126
2127 static bool bcd_is_valid(ppc_avr_t *bcd)
2128 {
2129 int i;
2130 int invalid = 0;
2131
2132 if (bcd_get_sgn(bcd) == 0) {
2133 return false;
2134 }
2135
2136 for (i = 1; i < 32; i++) {
2137 bcd_get_digit(bcd, i, &invalid);
2138 if (unlikely(invalid)) {
2139 return false;
2140 }
2141 }
2142 return true;
2143 }
2144
2145 static int bcd_cmp_zero(ppc_avr_t *bcd)
2146 {
2147 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2148 return CRF_EQ;
2149 } else {
2150 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2151 }
2152 }
2153
2154 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2155 {
2156 return reg->VsrH(7 - n);
2157 }
2158
2159 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2160 {
2161 reg->VsrH(7 - n) = val;
2162 }
2163
2164 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2165 {
2166 int i;
2167 int invalid = 0;
2168 for (i = 31; i > 0; i--) {
2169 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2170 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2171 if (unlikely(invalid)) {
2172 return 0; /* doesn't matter */
2173 } else if (dig_a > dig_b) {
2174 return 1;
2175 } else if (dig_a < dig_b) {
2176 return -1;
2177 }
2178 }
2179
2180 return 0;
2181 }
2182
2183 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2184 int *overflow)
2185 {
2186 int carry = 0;
2187 int i;
2188 int is_zero = 1;
2189
2190 for (i = 1; i <= 31; i++) {
2191 uint8_t digit = bcd_get_digit(a, i, invalid) +
2192 bcd_get_digit(b, i, invalid) + carry;
2193 is_zero &= (digit == 0);
2194 if (digit > 9) {
2195 carry = 1;
2196 digit -= 10;
2197 } else {
2198 carry = 0;
2199 }
2200
2201 bcd_put_digit(t, digit, i);
2202 }
2203
2204 *overflow = carry;
2205 return is_zero;
2206 }
2207
2208 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2209 int *overflow)
2210 {
2211 int carry = 0;
2212 int i;
2213
2214 for (i = 1; i <= 31; i++) {
2215 uint8_t digit = bcd_get_digit(a, i, invalid) -
2216 bcd_get_digit(b, i, invalid) + carry;
2217 if (digit & 0x80) {
2218 carry = -1;
2219 digit += 10;
2220 } else {
2221 carry = 0;
2222 }
2223
2224 bcd_put_digit(t, digit, i);
2225 }
2226
2227 *overflow = carry;
2228 }
2229
2230 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2231 {
2232
2233 int sgna = bcd_get_sgn(a);
2234 int sgnb = bcd_get_sgn(b);
2235 int invalid = (sgna == 0) || (sgnb == 0);
2236 int overflow = 0;
2237 int zero = 0;
2238 uint32_t cr = 0;
2239 ppc_avr_t result = { .u64 = { 0, 0 } };
2240
2241 if (!invalid) {
2242 if (sgna == sgnb) {
2243 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2244 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2245 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2246 } else {
2247 int magnitude = bcd_cmp_mag(a, b);
2248 if (magnitude > 0) {
2249 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2250 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2251 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2252 } else if (magnitude < 0) {
2253 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2254 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2255 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2256 } else {
2257 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2258 cr = CRF_EQ;
2259 }
2260 }
2261 }
2262
2263 if (unlikely(invalid)) {
2264 result.VsrD(0) = result.VsrD(1) = -1;
2265 cr = CRF_SO;
2266 } else if (overflow) {
2267 cr |= CRF_SO;
2268 } else if (zero) {
2269 cr |= CRF_EQ;
2270 }
2271
2272 *r = result;
2273
2274 return cr;
2275 }
2276
2277 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2278 {
2279 ppc_avr_t bcopy = *b;
2280 int sgnb = bcd_get_sgn(b);
2281 if (sgnb < 0) {
2282 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2283 } else if (sgnb > 0) {
2284 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2285 }
2286 /* else invalid ... defer to bcdadd code for proper handling */
2287
2288 return helper_bcdadd(r, a, &bcopy, ps);
2289 }
2290
2291 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2292 {
2293 int i;
2294 int cr = 0;
2295 uint16_t national = 0;
2296 uint16_t sgnb = get_national_digit(b, 0);
2297 ppc_avr_t ret = { .u64 = { 0, 0 } };
2298 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2299
2300 for (i = 1; i < 8; i++) {
2301 national = get_national_digit(b, i);
2302 if (unlikely(national < 0x30 || national > 0x39)) {
2303 invalid = 1;
2304 break;
2305 }
2306
2307 bcd_put_digit(&ret, national & 0xf, i);
2308 }
2309
2310 if (sgnb == NATIONAL_PLUS) {
2311 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2312 } else {
2313 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2314 }
2315
2316 cr = bcd_cmp_zero(&ret);
2317
2318 if (unlikely(invalid)) {
2319 cr = CRF_SO;
2320 }
2321
2322 *r = ret;
2323
2324 return cr;
2325 }
2326
2327 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2328 {
2329 int i;
2330 int cr = 0;
2331 int sgnb = bcd_get_sgn(b);
2332 int invalid = (sgnb == 0);
2333 ppc_avr_t ret = { .u64 = { 0, 0 } };
2334
2335 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2336
2337 for (i = 1; i < 8; i++) {
2338 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2339
2340 if (unlikely(invalid)) {
2341 break;
2342 }
2343 }
2344 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2345
2346 cr = bcd_cmp_zero(b);
2347
2348 if (ox_flag) {
2349 cr |= CRF_SO;
2350 }
2351
2352 if (unlikely(invalid)) {
2353 cr = CRF_SO;
2354 }
2355
2356 *r = ret;
2357
2358 return cr;
2359 }
2360
2361 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2362 {
2363 int i;
2364 int cr = 0;
2365 int invalid = 0;
2366 int zone_digit = 0;
2367 int zone_lead = ps ? 0xF : 0x3;
2368 int digit = 0;
2369 ppc_avr_t ret = { .u64 = { 0, 0 } };
2370 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2371
2372 if (unlikely((sgnb < 0xA) && ps)) {
2373 invalid = 1;
2374 }
2375
2376 for (i = 0; i < 16; i++) {
2377 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2378 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2379 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2380 invalid = 1;
2381 break;
2382 }
2383
2384 bcd_put_digit(&ret, digit, i + 1);
2385 }
2386
2387 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2388 (!ps && (sgnb & 0x4))) {
2389 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2390 } else {
2391 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2392 }
2393
2394 cr = bcd_cmp_zero(&ret);
2395
2396 if (unlikely(invalid)) {
2397 cr = CRF_SO;
2398 }
2399
2400 *r = ret;
2401
2402 return cr;
2403 }
2404
2405 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2406 {
2407 int i;
2408 int cr = 0;
2409 uint8_t digit = 0;
2410 int sgnb = bcd_get_sgn(b);
2411 int zone_lead = (ps) ? 0xF0 : 0x30;
2412 int invalid = (sgnb == 0);
2413 ppc_avr_t ret = { .u64 = { 0, 0 } };
2414
2415 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2416
2417 for (i = 0; i < 16; i++) {
2418 digit = bcd_get_digit(b, i + 1, &invalid);
2419
2420 if (unlikely(invalid)) {
2421 break;
2422 }
2423
2424 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2425 }
2426
2427 if (ps) {
2428 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2429 } else {
2430 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2431 }
2432
2433 cr = bcd_cmp_zero(b);
2434
2435 if (ox_flag) {
2436 cr |= CRF_SO;
2437 }
2438
2439 if (unlikely(invalid)) {
2440 cr = CRF_SO;
2441 }
2442
2443 *r = ret;
2444
2445 return cr;
2446 }
2447
2448 /**
2449 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2450 *
2451 * Returns:
2452 * > 0 if ahi|alo > bhi|blo,
2453 * 0 if ahi|alo == bhi|blo,
2454 * < 0 if ahi|alo < bhi|blo
2455 */
2456 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2457 uint64_t blo, uint64_t bhi)
2458 {
2459 return (ahi == bhi) ?
2460 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2461 (ahi > bhi ? 1 : -1);
2462 }
2463
2464 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2465 {
2466 int i;
2467 int cr;
2468 uint64_t lo_value;
2469 uint64_t hi_value;
2470 uint64_t rem;
2471 ppc_avr_t ret = { .u64 = { 0, 0 } };
2472
2473 if (b->VsrSD(0) < 0) {
2474 lo_value = -b->VsrSD(1);
2475 hi_value = ~b->VsrD(0) + !lo_value;
2476 bcd_put_digit(&ret, 0xD, 0);
2477
2478 cr = CRF_LT;
2479 } else {
2480 lo_value = b->VsrD(1);
2481 hi_value = b->VsrD(0);
2482 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2483
2484 if (hi_value == 0 && lo_value == 0) {
2485 cr = CRF_EQ;
2486 } else {
2487 cr = CRF_GT;
2488 }
2489 }
2490
2491 /*
2492 * Check src limits: abs(src) <= 10^31 - 1
2493 *
2494 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2495 */
2496 if (ucmp128(lo_value, hi_value,
2497 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2498 cr |= CRF_SO;
2499
2500 /*
2501 * According to the ISA, if src wouldn't fit in the destination
2502 * register, the result is undefined.
2503 * In that case, we leave r unchanged.
2504 */
2505 } else {
2506 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2507
2508 for (i = 1; i < 16; rem /= 10, i++) {
2509 bcd_put_digit(&ret, rem % 10, i);
2510 }
2511
2512 for (; i < 32; lo_value /= 10, i++) {
2513 bcd_put_digit(&ret, lo_value % 10, i);
2514 }
2515
2516 *r = ret;
2517 }
2518
2519 return cr;
2520 }
2521
2522 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2523 {
2524 uint8_t i;
2525 int cr;
2526 uint64_t carry;
2527 uint64_t unused;
2528 uint64_t lo_value;
2529 uint64_t hi_value = 0;
2530 int sgnb = bcd_get_sgn(b);
2531 int invalid = (sgnb == 0);
2532
2533 lo_value = bcd_get_digit(b, 31, &invalid);
2534 for (i = 30; i > 0; i--) {
2535 mulu64(&lo_value, &carry, lo_value, 10ULL);
2536 mulu64(&hi_value, &unused, hi_value, 10ULL);
2537 lo_value += bcd_get_digit(b, i, &invalid);
2538 hi_value += carry;
2539
2540 if (unlikely(invalid)) {
2541 break;
2542 }
2543 }
2544
2545 if (sgnb == -1) {
2546 r->VsrSD(1) = -lo_value;
2547 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2548 } else {
2549 r->VsrSD(1) = lo_value;
2550 r->VsrSD(0) = hi_value;
2551 }
2552
2553 cr = bcd_cmp_zero(b);
2554
2555 if (unlikely(invalid)) {
2556 cr = CRF_SO;
2557 }
2558
2559 return cr;
2560 }
2561
2562 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2563 {
2564 int i;
2565 int invalid = 0;
2566
2567 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2568 return CRF_SO;
2569 }
2570
2571 *r = *a;
2572 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2573
2574 for (i = 1; i < 32; i++) {
2575 bcd_get_digit(a, i, &invalid);
2576 bcd_get_digit(b, i, &invalid);
2577 if (unlikely(invalid)) {
2578 return CRF_SO;
2579 }
2580 }
2581
2582 return bcd_cmp_zero(r);
2583 }
2584
2585 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2586 {
2587 int sgnb = bcd_get_sgn(b);
2588
2589 *r = *b;
2590 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2591
2592 if (bcd_is_valid(b) == false) {
2593 return CRF_SO;
2594 }
2595
2596 return bcd_cmp_zero(r);
2597 }
2598
2599 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2600 {
2601 int cr;
2602 int i = a->VsrSB(7);
2603 bool ox_flag = false;
2604 int sgnb = bcd_get_sgn(b);
2605 ppc_avr_t ret = *b;
2606 ret.VsrD(1) &= ~0xf;
2607
2608 if (bcd_is_valid(b) == false) {
2609 return CRF_SO;
2610 }
2611
2612 if (unlikely(i > 31)) {
2613 i = 31;
2614 } else if (unlikely(i < -31)) {
2615 i = -31;
2616 }
2617
2618 if (i > 0) {
2619 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2620 } else {
2621 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2622 }
2623 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2624
2625 *r = ret;
2626
2627 cr = bcd_cmp_zero(r);
2628 if (ox_flag) {
2629 cr |= CRF_SO;
2630 }
2631
2632 return cr;
2633 }
2634
2635 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2636 {
2637 int cr;
2638 int i;
2639 int invalid = 0;
2640 bool ox_flag = false;
2641 ppc_avr_t ret = *b;
2642
2643 for (i = 0; i < 32; i++) {
2644 bcd_get_digit(b, i, &invalid);
2645
2646 if (unlikely(invalid)) {
2647 return CRF_SO;
2648 }
2649 }
2650
2651 i = a->VsrSB(7);
2652 if (i >= 32) {
2653 ox_flag = true;
2654 ret.VsrD(1) = ret.VsrD(0) = 0;
2655 } else if (i <= -32) {
2656 ret.VsrD(1) = ret.VsrD(0) = 0;
2657 } else if (i > 0) {
2658 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2659 } else {
2660 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2661 }
2662 *r = ret;
2663
2664 cr = bcd_cmp_zero(r);
2665 if (ox_flag) {
2666 cr |= CRF_SO;
2667 }
2668
2669 return cr;
2670 }
2671
2672 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2673 {
2674 int cr;
2675 int unused = 0;
2676 int invalid = 0;
2677 bool ox_flag = false;
2678 int sgnb = bcd_get_sgn(b);
2679 ppc_avr_t ret = *b;
2680 ret.VsrD(1) &= ~0xf;
2681
2682 int i = a->VsrSB(7);
2683 ppc_avr_t bcd_one;
2684
2685 bcd_one.VsrD(0) = 0;
2686 bcd_one.VsrD(1) = 0x10;
2687
2688 if (bcd_is_valid(b) == false) {
2689 return CRF_SO;
2690 }
2691
2692 if (unlikely(i > 31)) {
2693 i = 31;
2694 } else if (unlikely(i < -31)) {
2695 i = -31;
2696 }
2697
2698 if (i > 0) {
2699 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2700 } else {
2701 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2702
2703 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2704 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2705 }
2706 }
2707 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2708
2709 cr = bcd_cmp_zero(&ret);
2710 if (ox_flag) {
2711 cr |= CRF_SO;
2712 }
2713 *r = ret;
2714
2715 return cr;
2716 }
2717
2718 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2719 {
2720 uint64_t mask;
2721 uint32_t ox_flag = 0;
2722 int i = a->VsrSH(3) + 1;
2723 ppc_avr_t ret = *b;
2724
2725 if (bcd_is_valid(b) == false) {
2726 return CRF_SO;
2727 }
2728
2729 if (i > 16 && i < 32) {
2730 mask = (uint64_t)-1 >> (128 - i * 4);
2731 if (ret.VsrD(0) & ~mask) {
2732 ox_flag = CRF_SO;
2733 }
2734
2735 ret.VsrD(0) &= mask;
2736 } else if (i >= 0 && i <= 16) {
2737 mask = (uint64_t)-1 >> (64 - i * 4);
2738 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2739 ox_flag = CRF_SO;
2740 }
2741
2742 ret.VsrD(1) &= mask;
2743 ret.VsrD(0) = 0;
2744 }
2745 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2746 *r = ret;
2747
2748 return bcd_cmp_zero(&ret) | ox_flag;
2749 }
2750
2751 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2752 {
2753 int i;
2754 uint64_t mask;
2755 uint32_t ox_flag = 0;
2756 int invalid = 0;
2757 ppc_avr_t ret = *b;
2758
2759 for (i = 0; i < 32; i++) {
2760 bcd_get_digit(b, i, &invalid);
2761
2762 if (unlikely(invalid)) {
2763 return CRF_SO;
2764 }
2765 }
2766
2767 i = a->VsrSH(3);
2768 if (i > 16 && i < 33) {
2769 mask = (uint64_t)-1 >> (128 - i * 4);
2770 if (ret.VsrD(0) & ~mask) {
2771 ox_flag = CRF_SO;
2772 }
2773
2774 ret.VsrD(0) &= mask;
2775 } else if (i > 0 && i <= 16) {
2776 mask = (uint64_t)-1 >> (64 - i * 4);
2777 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2778 ox_flag = CRF_SO;
2779 }
2780
2781 ret.VsrD(1) &= mask;
2782 ret.VsrD(0) = 0;
2783 } else if (i == 0) {
2784 if (ret.VsrD(0) || ret.VsrD(1)) {
2785 ox_flag = CRF_SO;
2786 }
2787 ret.VsrD(0) = ret.VsrD(1) = 0;
2788 }
2789
2790 *r = ret;
2791 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2792 return ox_flag | CRF_EQ;
2793 }
2794
2795 return ox_flag | CRF_GT;
2796 }
2797
2798 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2799 {
2800 int i;
2801 VECTOR_FOR_INORDER_I(i, u8) {
2802 r->u8[i] = AES_sbox[a->u8[i]];
2803 }
2804 }
2805
2806 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2807 {
2808 ppc_avr_t result;
2809 int i;
2810
2811 VECTOR_FOR_INORDER_I(i, u32) {
2812 result.VsrW(i) = b->VsrW(i) ^
2813 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2814 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2815 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2816 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2817 }
2818 *r = result;
2819 }
2820
2821 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2822 {
2823 ppc_avr_t result;
2824 int i;
2825
2826 VECTOR_FOR_INORDER_I(i, u8) {
2827 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2828 }
2829 *r = result;
2830 }
2831
2832 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2833 {
2834 /* This differs from what is written in ISA V2.07. The RTL is */
2835 /* incorrect and will be fixed in V2.07B. */
2836 int i;
2837 ppc_avr_t tmp;
2838
2839 VECTOR_FOR_INORDER_I(i, u8) {
2840 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2841 }
2842
2843 VECTOR_FOR_INORDER_I(i, u32) {
2844 r->VsrW(i) =
2845 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2846 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2847 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2848 AES_imc[tmp.VsrB(4 * i + 3)][3];
2849 }
2850 }
2851
2852 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2853 {
2854 ppc_avr_t result;
2855 int i;
2856
2857 VECTOR_FOR_INORDER_I(i, u8) {
2858 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2859 }
2860 *r = result;
2861 }
2862
2863 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2864 {
2865 int st = (st_six & 0x10) != 0;
2866 int six = st_six & 0xF;
2867 int i;
2868
2869 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2870 if (st == 0) {
2871 if ((six & (0x8 >> i)) == 0) {
2872 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2873 ror32(a->VsrW(i), 18) ^
2874 (a->VsrW(i) >> 3);
2875 } else { /* six.bit[i] == 1 */
2876 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2877 ror32(a->VsrW(i), 19) ^
2878 (a->VsrW(i) >> 10);
2879 }
2880 } else { /* st == 1 */
2881 if ((six & (0x8 >> i)) == 0) {
2882 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2883 ror32(a->VsrW(i), 13) ^
2884 ror32(a->VsrW(i), 22);
2885 } else { /* six.bit[i] == 1 */
2886 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2887 ror32(a->VsrW(i), 11) ^
2888 ror32(a->VsrW(i), 25);
2889 }
2890 }
2891 }
2892 }
2893
2894 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2895 {
2896 int st = (st_six & 0x10) != 0;
2897 int six = st_six & 0xF;
2898 int i;
2899
2900 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2901 if (st == 0) {
2902 if ((six & (0x8 >> (2 * i))) == 0) {
2903 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2904 ror64(a->VsrD(i), 8) ^
2905 (a->VsrD(i) >> 7);
2906 } else { /* six.bit[2*i] == 1 */
2907 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2908 ror64(a->VsrD(i), 61) ^
2909 (a->VsrD(i) >> 6);
2910 }
2911 } else { /* st == 1 */
2912 if ((six & (0x8 >> (2 * i))) == 0) {
2913 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2914 ror64(a->VsrD(i), 34) ^
2915 ror64(a->VsrD(i), 39);
2916 } else { /* six.bit[2*i] == 1 */
2917 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2918 ror64(a->VsrD(i), 18) ^
2919 ror64(a->VsrD(i), 41);
2920 }
2921 }
2922 }
2923 }
2924
2925 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2926 {
2927 ppc_avr_t result;
2928 int i;
2929
2930 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2931 int indexA = c->VsrB(i) >> 4;
2932 int indexB = c->VsrB(i) & 0xF;
2933
2934 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2935 }
2936 *r = result;
2937 }
2938
2939 #undef VECTOR_FOR_INORDER_I
2940
2941 /*****************************************************************************/
2942 /* SPE extension helpers */
2943 /* Use a table to make this quicker */
2944 static const uint8_t hbrev[16] = {
2945 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2946 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2947 };
2948
2949 static inline uint8_t byte_reverse(uint8_t val)
2950 {
2951 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2952 }
2953
2954 static inline uint32_t word_reverse(uint32_t val)
2955 {
2956 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2957 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2958 }
2959
2960 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2961 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2962 {
2963 uint32_t a, b, d, mask;
2964
2965 mask = UINT32_MAX >> (32 - MASKBITS);
2966 a = arg1 & mask;
2967 b = arg2 & mask;
2968 d = word_reverse(1 + word_reverse(a | ~b));
2969 return (arg1 & ~mask) | (d & b);
2970 }
2971
2972 uint32_t helper_cntlsw32(uint32_t val)
2973 {
2974 if (val & 0x80000000) {
2975 return clz32(~val);
2976 } else {
2977 return clz32(val);
2978 }
2979 }
2980
2981 uint32_t helper_cntlzw32(uint32_t val)
2982 {
2983 return clz32(val);
2984 }
2985
2986 /* 440 specific */
2987 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2988 target_ulong low, uint32_t update_Rc)
2989 {
2990 target_ulong mask;
2991 int i;
2992
2993 i = 1;
2994 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2995 if ((high & mask) == 0) {
2996 if (update_Rc) {
2997 env->crf[0] = 0x4;
2998 }
2999 goto done;
3000 }
3001 i++;
3002 }
3003 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3004 if ((low & mask) == 0) {
3005 if (update_Rc) {
3006 env->crf[0] = 0x8;
3007 }
3008 goto done;
3009 }
3010 i++;
3011 }
3012 i = 8;
3013 if (update_Rc) {
3014 env->crf[0] = 0x2;
3015 }
3016 done:
3017 env->xer = (env->xer & ~0x7F) | i;
3018 if (update_Rc) {
3019 env->crf[0] |= xer_so;
3020 }
3021 return i;
3022 }