]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
32
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
36
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 {
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
43 }
44 }
45
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48 {
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
68 }
69
70 return (target_ulong)rt;
71 }
72
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75 {
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
96 }
97
98 return (target_ulong)rt;
99 }
100
101 #if defined(TARGET_PPC64)
102
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 {
105 uint64_t rt = 0;
106 int overflow = 0;
107
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
113 }
114
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
117 }
118
119 return rt;
120 }
121
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 {
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
128
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
134 }
135
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
138 }
139
140 return rt;
141 }
142
143 #endif
144
145
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
150 /*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 {
163 return hasvalue(rb, ra) ? CRF_GT : 0;
164 }
165
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
169
170 /*
171 * Return a random number.
172 */
173 uint64_t helper_darn32(void)
174 {
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
184
185 return ret;
186 }
187
188 uint64_t helper_darn64(void)
189 {
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
199
200 return ret;
201 }
202
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 {
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217 }
218
219 #endif
220
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 {
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234 }
235
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
239 {
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
250 }
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
254 }
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
258 }
259 return (target_long)ret;
260 }
261
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
265 {
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
276 }
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
280 }
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
284 }
285 return ret;
286 }
287 #endif
288
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
291 {
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300 }
301
302 target_ulong helper_popcntw(target_ulong val)
303 {
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316 }
317 #else
318 target_ulong helper_popcntb(target_ulong val)
319 {
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325 }
326 #endif
327
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
329 {
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388 }
389
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 {
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406 }
407
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 {
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424 }
425
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if HOST_BIG_ENDIAN
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
435
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
478
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 {
481 ppc_store_vscr(env, vscr);
482 }
483
484 uint32_t helper_mfvscr(CPUPPCState *env)
485 {
486 return ppc_get_vscr(env);
487 }
488
489 static inline void set_vscr_sat(CPUPPCState *env)
490 {
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
493 }
494
495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 {
497 int i;
498
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
501 }
502 }
503
504 /* vprtybw */
505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 {
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
512 }
513 }
514
515 /* vprtybd */
516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 {
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
524 }
525 }
526
527 /* vprtybq */
528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 {
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
536 }
537
538 #define VARITHFP(suffix, func) \
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
541 { \
542 int i; \
543 \
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
546 } \
547 }
548 VARITHFP(addfp, float32_add)
549 VARITHFP(subfp, float32_sub)
550 VARITHFP(minfp, float32_min)
551 VARITHFP(maxfp, float32_max)
552 #undef VARITHFP
553
554 #define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
557 { \
558 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
562 } \
563 }
564 VARITHFPFMA(maddfp, 0);
565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566 #undef VARITHFPFMA
567
568 #define VARITHSAT_CASE(type, op, cvt, element) \
569 { \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
572 }
573
574 #define VARITHSAT_DO(name, op, optype, cvt, element) \
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 { \
578 int sat = 0; \
579 int i; \
580 \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
582 VARITHSAT_CASE(optype, op, cvt, element); \
583 } \
584 if (sat) { \
585 vscr_sat->u32[0] = 1; \
586 } \
587 }
588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600 #undef VARITHSAT_CASE
601 #undef VARITHSAT_DO
602 #undef VARITHSAT_SIGNED
603 #undef VARITHSAT_UNSIGNED
604
605 #define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 { \
608 int i; \
609 \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
613 } \
614 }
615
616 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620 VAVG(b, s8, int16_t, u8, uint16_t)
621 VAVG(h, s16, int32_t, u16, uint32_t)
622 VAVG(w, s32, int64_t, u32, uint64_t)
623 #undef VAVG_DO
624 #undef VAVG
625
626 #define VABSDU_DO(name, element) \
627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 int i; \
630 \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
635 } \
636 }
637
638 /*
639 * VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
642 */
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645 VABSDU(b, u8)
646 VABSDU(h, u16)
647 VABSDU(w, u32)
648 #undef VABSDU_DO
649 #undef VABSDU
650
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
654 { \
655 int i; \
656 \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
660 } \
661 }
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
664 #undef VCF
665
666 #define VCMPNEZ(NAME, ELEM) \
667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 { \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
672 } \
673 }
674 VCMPNEZ(VCMPNEZB, u8)
675 VCMPNEZ(VCMPNEZH, u16)
676 VCMPNEZ(VCMPNEZW, u32)
677 #undef VCMPNEZ
678
679 #define VCMPFP_DO(suffix, compare, order, record) \
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
682 { \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
687 \
688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
689 uint32_t result; \
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
699 } \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
703 } \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
706 } \
707 }
708 #define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711 VCMPFP(eqfp, ==, float_relation_equal)
712 VCMPFP(gefp, !=, float_relation_less)
713 VCMPFP(gtfp, ==, float_relation_greater)
714 #undef VCMPFP_DO
715 #undef VCMPFP
716
717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
719 {
720 int i;
721 int all_in = 0;
722
723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 all_in = 1;
729 } else {
730 float32 bneg = float32_chs(b->f32[i]);
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
735
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
738 }
739 }
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
742 }
743 }
744
745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 0);
748 }
749
750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
752 {
753 vcmpbfp_internal(env, r, a, b, 1);
754 }
755
756 #define VCT(suffix, satcvt, element) \
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
759 { \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
763 \
764 set_float_rounding_mode(float_round_to_zero, &s); \
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
767 r->element[i] = 0; \
768 } else { \
769 float64 t = float32_to_float64(b->f32[i], &s); \
770 int64_t j; \
771 \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
775 } \
776 } \
777 if (sat) { \
778 set_vscr_sat(env); \
779 } \
780 }
781 VCT(uxs, cvtsduw, u32)
782 VCT(sxs, cvtsdsw, s32)
783 #undef VCT
784
785 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
786
787 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
788 {
789 int64_t psum = 0;
790 for (int i = 0; i < 8; i++, mask >>= 1) {
791 if (mask & 1) {
792 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
793 }
794 }
795 return psum;
796 }
797
798 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
799 {
800 int64_t psum = 0;
801 for (int i = 0; i < 4; i++, mask >>= 1) {
802 if (mask & 1) {
803 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
804 }
805 }
806 return psum;
807 }
808
809 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
810 {
811 int64_t psum = 0;
812 for (int i = 0; i < 2; i++, mask >>= 1) {
813 if (mask & 1) {
814 psum += (int64_t)sextract32(a, 16 * i, 16) *
815 sextract32(b, 16 * i, 16);
816 }
817 }
818 return psum;
819 }
820
821 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
822 uint32_t mask, bool sat, bool acc, do_ger ger)
823 {
824 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
825 xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
826 ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
827 uint8_t xmsk_bit, ymsk_bit;
828 int64_t psum;
829 int i, j;
830 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
831 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
832 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
833 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
834 if (acc) {
835 psum += at[i].VsrSW(j);
836 }
837 if (sat && psum > INT32_MAX) {
838 set_vscr_sat(env);
839 at[i].VsrSW(j) = INT32_MAX;
840 } else if (sat && psum < INT32_MIN) {
841 set_vscr_sat(env);
842 at[i].VsrSW(j) = INT32_MIN;
843 } else {
844 at[i].VsrSW(j) = (int32_t) psum;
845 }
846 } else {
847 at[i].VsrSW(j) = 0;
848 }
849 }
850 }
851 }
852
853 QEMU_FLATTEN
854 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
855 ppc_acc_t *at, uint32_t mask)
856 {
857 xviger(env, a, b, at, mask, false, false, ger_rank8);
858 }
859
860 QEMU_FLATTEN
861 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
862 ppc_acc_t *at, uint32_t mask)
863 {
864 xviger(env, a, b, at, mask, false, true, ger_rank8);
865 }
866
867 QEMU_FLATTEN
868 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
869 ppc_acc_t *at, uint32_t mask)
870 {
871 xviger(env, a, b, at, mask, false, false, ger_rank4);
872 }
873
874 QEMU_FLATTEN
875 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
876 ppc_acc_t *at, uint32_t mask)
877 {
878 xviger(env, a, b, at, mask, false, true, ger_rank4);
879 }
880
881 QEMU_FLATTEN
882 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
883 ppc_acc_t *at, uint32_t mask)
884 {
885 xviger(env, a, b, at, mask, true, true, ger_rank4);
886 }
887
888 QEMU_FLATTEN
889 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
890 ppc_acc_t *at, uint32_t mask)
891 {
892 xviger(env, a, b, at, mask, false, false, ger_rank2);
893 }
894
895 QEMU_FLATTEN
896 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
897 ppc_acc_t *at, uint32_t mask)
898 {
899 xviger(env, a, b, at, mask, true, false, ger_rank2);
900 }
901
902 QEMU_FLATTEN
903 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
904 ppc_acc_t *at, uint32_t mask)
905 {
906 xviger(env, a, b, at, mask, false, true, ger_rank2);
907 }
908
909 QEMU_FLATTEN
910 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
911 ppc_acc_t *at, uint32_t mask)
912 {
913 xviger(env, a, b, at, mask, true, true, ger_rank2);
914 }
915
916 target_ulong helper_vclzlsbb(ppc_avr_t *r)
917 {
918 target_ulong count = 0;
919 int i;
920 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
921 if (r->VsrB(i) & 0x01) {
922 break;
923 }
924 count++;
925 }
926 return count;
927 }
928
929 target_ulong helper_vctzlsbb(ppc_avr_t *r)
930 {
931 target_ulong count = 0;
932 int i;
933 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
934 if (r->VsrB(i) & 0x01) {
935 break;
936 }
937 count++;
938 }
939 return count;
940 }
941
942 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
943 ppc_avr_t *b, ppc_avr_t *c)
944 {
945 int sat = 0;
946 int i;
947
948 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
949 int32_t prod = a->s16[i] * b->s16[i];
950 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
951
952 r->s16[i] = cvtswsh(t, &sat);
953 }
954
955 if (sat) {
956 set_vscr_sat(env);
957 }
958 }
959
960 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
961 ppc_avr_t *b, ppc_avr_t *c)
962 {
963 int sat = 0;
964 int i;
965
966 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
967 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
968 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
969 r->s16[i] = cvtswsh(t, &sat);
970 }
971
972 if (sat) {
973 set_vscr_sat(env);
974 }
975 }
976
977 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
978 {
979 int i;
980
981 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
982 int32_t prod = a->s16[i] * b->s16[i];
983 r->s16[i] = (int16_t) (prod + c->s16[i]);
984 }
985 }
986
987 #define VMRG_DO(name, element, access, ofs) \
988 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
989 { \
990 ppc_avr_t result; \
991 int i, half = ARRAY_SIZE(r->element) / 2; \
992 \
993 for (i = 0; i < half; i++) { \
994 result.access(i * 2 + 0) = a->access(i + ofs); \
995 result.access(i * 2 + 1) = b->access(i + ofs); \
996 } \
997 *r = result; \
998 }
999
1000 #define VMRG(suffix, element, access) \
1001 VMRG_DO(mrgl##suffix, element, access, half) \
1002 VMRG_DO(mrgh##suffix, element, access, 0)
1003 VMRG(b, u8, VsrB)
1004 VMRG(h, u16, VsrH)
1005 VMRG(w, u32, VsrW)
1006 #undef VMRG_DO
1007 #undef VMRG
1008
1009 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1010 {
1011 int32_t prod[16];
1012 int i;
1013
1014 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1015 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1016 }
1017
1018 VECTOR_FOR_INORDER_I(i, s32) {
1019 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1020 prod[4 * i + 2] + prod[4 * i + 3];
1021 }
1022 }
1023
1024 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1025 {
1026 int32_t prod[8];
1027 int i;
1028
1029 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1030 prod[i] = a->s16[i] * b->s16[i];
1031 }
1032
1033 VECTOR_FOR_INORDER_I(i, s32) {
1034 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1035 }
1036 }
1037
1038 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1039 ppc_avr_t *b, ppc_avr_t *c)
1040 {
1041 int32_t prod[8];
1042 int i;
1043 int sat = 0;
1044
1045 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1046 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1047 }
1048
1049 VECTOR_FOR_INORDER_I(i, s32) {
1050 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1051
1052 r->u32[i] = cvtsdsw(t, &sat);
1053 }
1054
1055 if (sat) {
1056 set_vscr_sat(env);
1057 }
1058 }
1059
1060 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1061 {
1062 uint16_t prod[16];
1063 int i;
1064
1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1066 prod[i] = a->u8[i] * b->u8[i];
1067 }
1068
1069 VECTOR_FOR_INORDER_I(i, u32) {
1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1071 prod[4 * i + 2] + prod[4 * i + 3];
1072 }
1073 }
1074
1075 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1076 {
1077 uint32_t prod[8];
1078 int i;
1079
1080 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1081 prod[i] = a->u16[i] * b->u16[i];
1082 }
1083
1084 VECTOR_FOR_INORDER_I(i, u32) {
1085 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1086 }
1087 }
1088
1089 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1090 ppc_avr_t *b, ppc_avr_t *c)
1091 {
1092 uint32_t prod[8];
1093 int i;
1094 int sat = 0;
1095
1096 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1097 prod[i] = a->u16[i] * b->u16[i];
1098 }
1099
1100 VECTOR_FOR_INORDER_I(i, s32) {
1101 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1102
1103 r->u32[i] = cvtuduw(t, &sat);
1104 }
1105
1106 if (sat) {
1107 set_vscr_sat(env);
1108 }
1109 }
1110
1111 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1112 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1113 { \
1114 int i; \
1115 \
1116 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1117 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1118 (cast)b->mul_access(i); \
1119 } \
1120 }
1121
1122 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1123 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1124 { \
1125 int i; \
1126 \
1127 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1128 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1129 (cast)b->mul_access(i + 1); \
1130 } \
1131 }
1132
1133 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1134 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1135 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1136 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1137 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1138 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1139 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1140 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1141 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1142 #undef VMUL_DO_EVN
1143 #undef VMUL_DO_ODD
1144 #undef VMUL
1145
1146 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1147 target_ulong uim)
1148 {
1149 int i, idx;
1150 ppc_vsr_t tmp = { .u64 = {0, 0} };
1151
1152 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1153 if ((pcv->VsrB(i) >> 5) == uim) {
1154 idx = pcv->VsrB(i) & 0x1f;
1155 if (idx < ARRAY_SIZE(t->u8)) {
1156 tmp.VsrB(i) = s0->VsrB(idx);
1157 } else {
1158 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1159 }
1160 }
1161 }
1162
1163 *t = tmp;
1164 }
1165
1166 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1167 {
1168 Int128 neg1 = int128_makes64(-1);
1169 Int128 int128_min = int128_make128(0, INT64_MIN);
1170 if (likely(int128_nz(b->s128) &&
1171 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1172 t->s128 = int128_divs(a->s128, b->s128);
1173 } else {
1174 t->s128 = a->s128; /* Undefined behavior */
1175 }
1176 }
1177
1178 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1179 {
1180 if (int128_nz(b->s128)) {
1181 t->s128 = int128_divu(a->s128, b->s128);
1182 } else {
1183 t->s128 = a->s128; /* Undefined behavior */
1184 }
1185 }
1186
1187 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1188 {
1189 int i;
1190 int64_t high;
1191 uint64_t low;
1192 for (i = 0; i < 2; i++) {
1193 high = a->s64[i];
1194 low = 0;
1195 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1196 t->s64[i] = a->s64[i]; /* Undefined behavior */
1197 } else {
1198 divs128(&low, &high, b->s64[i]);
1199 t->s64[i] = low;
1200 }
1201 }
1202 }
1203
1204 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1205 {
1206 int i;
1207 uint64_t high, low;
1208 for (i = 0; i < 2; i++) {
1209 high = a->u64[i];
1210 low = 0;
1211 if (unlikely(!b->u64[i])) {
1212 t->u64[i] = a->u64[i]; /* Undefined behavior */
1213 } else {
1214 divu128(&low, &high, b->u64[i]);
1215 t->u64[i] = low;
1216 }
1217 }
1218 }
1219
1220 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1221 {
1222 Int128 high, low;
1223 Int128 int128_min = int128_make128(0, INT64_MIN);
1224 Int128 neg1 = int128_makes64(-1);
1225
1226 high = a->s128;
1227 low = int128_zero();
1228 if (unlikely(!int128_nz(b->s128) ||
1229 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1230 t->s128 = a->s128; /* Undefined behavior */
1231 } else {
1232 divs256(&low, &high, b->s128);
1233 t->s128 = low;
1234 }
1235 }
1236
1237 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1238 {
1239 Int128 high, low;
1240
1241 high = a->s128;
1242 low = int128_zero();
1243 if (unlikely(!int128_nz(b->s128))) {
1244 t->s128 = a->s128; /* Undefined behavior */
1245 } else {
1246 divu256(&low, &high, b->s128);
1247 t->s128 = low;
1248 }
1249 }
1250
1251 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1252 {
1253 Int128 neg1 = int128_makes64(-1);
1254 Int128 int128_min = int128_make128(0, INT64_MIN);
1255 if (likely(int128_nz(b->s128) &&
1256 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1257 t->s128 = int128_rems(a->s128, b->s128);
1258 } else {
1259 t->s128 = int128_zero(); /* Undefined behavior */
1260 }
1261 }
1262
1263 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1264 {
1265 if (likely(int128_nz(b->s128))) {
1266 t->s128 = int128_remu(a->s128, b->s128);
1267 } else {
1268 t->s128 = int128_zero(); /* Undefined behavior */
1269 }
1270 }
1271
1272 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1273 {
1274 ppc_avr_t result;
1275 int i;
1276
1277 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1278 int s = c->VsrB(i) & 0x1f;
1279 int index = s & 0xf;
1280
1281 if (s & 0x10) {
1282 result.VsrB(i) = b->VsrB(index);
1283 } else {
1284 result.VsrB(i) = a->VsrB(index);
1285 }
1286 }
1287 *r = result;
1288 }
1289
1290 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1291 {
1292 ppc_avr_t result;
1293 int i;
1294
1295 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1296 int s = c->VsrB(i) & 0x1f;
1297 int index = 15 - (s & 0xf);
1298
1299 if (s & 0x10) {
1300 result.VsrB(i) = a->VsrB(index);
1301 } else {
1302 result.VsrB(i) = b->VsrB(index);
1303 }
1304 }
1305 *r = result;
1306 }
1307
1308 #define XXGENPCV_BE_EXP(NAME, SZ) \
1309 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1310 { \
1311 ppc_vsr_t tmp; \
1312 \
1313 /* Initialize tmp with the result of an all-zeros mask */ \
1314 tmp.VsrD(0) = 0x1011121314151617; \
1315 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1316 \
1317 /* Iterate over the most significant byte of each element */ \
1318 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1319 if (b->VsrB(i) & 0x80) { \
1320 /* Update each byte of the element */ \
1321 for (int k = 0; k < SZ; k++) { \
1322 tmp.VsrB(i + k) = j + k; \
1323 } \
1324 j += SZ; \
1325 } \
1326 } \
1327 \
1328 *t = tmp; \
1329 }
1330
1331 #define XXGENPCV_BE_COMP(NAME, SZ) \
1332 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1333 { \
1334 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1335 \
1336 /* Iterate over the most significant byte of each element */ \
1337 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1338 if (b->VsrB(i) & 0x80) { \
1339 /* Update each byte of the element */ \
1340 for (int k = 0; k < SZ; k++) { \
1341 tmp.VsrB(j + k) = i + k; \
1342 } \
1343 j += SZ; \
1344 } \
1345 } \
1346 \
1347 *t = tmp; \
1348 }
1349
1350 #define XXGENPCV_LE_EXP(NAME, SZ) \
1351 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1352 { \
1353 ppc_vsr_t tmp; \
1354 \
1355 /* Initialize tmp with the result of an all-zeros mask */ \
1356 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1357 tmp.VsrD(1) = 0x1716151413121110; \
1358 \
1359 /* Iterate over the most significant byte of each element */ \
1360 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1361 /* Reverse indexing of "i" */ \
1362 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1363 if (b->VsrB(idx) & 0x80) { \
1364 /* Update each byte of the element */ \
1365 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1366 tmp.VsrB(idx + rk) = j + k; \
1367 } \
1368 j += SZ; \
1369 } \
1370 } \
1371 \
1372 *t = tmp; \
1373 }
1374
1375 #define XXGENPCV_LE_COMP(NAME, SZ) \
1376 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1377 { \
1378 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1379 \
1380 /* Iterate over the most significant byte of each element */ \
1381 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1382 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1383 /* Update each byte of the element */ \
1384 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1385 /* Reverse indexing of "j" */ \
1386 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1387 tmp.VsrB(idx + rk) = i + k; \
1388 } \
1389 j += SZ; \
1390 } \
1391 } \
1392 \
1393 *t = tmp; \
1394 }
1395
1396 #define XXGENPCV(NAME, SZ) \
1397 XXGENPCV_BE_EXP(NAME, SZ) \
1398 XXGENPCV_BE_COMP(NAME, SZ) \
1399 XXGENPCV_LE_EXP(NAME, SZ) \
1400 XXGENPCV_LE_COMP(NAME, SZ) \
1401
1402 XXGENPCV(XXGENPCVBM, 1)
1403 XXGENPCV(XXGENPCVHM, 2)
1404 XXGENPCV(XXGENPCVWM, 4)
1405 XXGENPCV(XXGENPCVDM, 8)
1406
1407 #undef XXGENPCV_BE_EXP
1408 #undef XXGENPCV_BE_COMP
1409 #undef XXGENPCV_LE_EXP
1410 #undef XXGENPCV_LE_COMP
1411 #undef XXGENPCV
1412
1413 #if HOST_BIG_ENDIAN
1414 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1415 #define VBPERMD_INDEX(i) (i)
1416 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1417 #else
1418 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1419 #define VBPERMD_INDEX(i) (1 - i)
1420 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1421 #endif
1422 #define EXTRACT_BIT(avr, i, index) \
1423 (extract64((avr)->VsrD(i), 63 - index, 1))
1424
1425 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1426 {
1427 int i, j;
1428 ppc_avr_t result = { .u64 = { 0, 0 } };
1429 VECTOR_FOR_INORDER_I(i, u64) {
1430 for (j = 0; j < 8; j++) {
1431 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1432 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1433 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1434 }
1435 }
1436 }
1437 *r = result;
1438 }
1439
1440 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1441 {
1442 int i;
1443 uint64_t perm = 0;
1444
1445 VECTOR_FOR_INORDER_I(i, u8) {
1446 int index = VBPERMQ_INDEX(b, i);
1447
1448 if (index < 128) {
1449 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1450 if (a->u64[VBPERMQ_DW(index)] & mask) {
1451 perm |= (0x8000 >> i);
1452 }
1453 }
1454 }
1455
1456 r->VsrD(0) = perm;
1457 r->VsrD(1) = 0;
1458 }
1459
1460 #undef VBPERMQ_INDEX
1461 #undef VBPERMQ_DW
1462
1463 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1464 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1465 { \
1466 int i, j; \
1467 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1468 \
1469 VECTOR_FOR_INORDER_I(i, srcfld) { \
1470 prod[i] = 0; \
1471 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1472 if (a->srcfld[i] & (1ull << j)) { \
1473 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1474 } \
1475 } \
1476 } \
1477 \
1478 VECTOR_FOR_INORDER_I(i, trgfld) { \
1479 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1480 } \
1481 }
1482
1483 PMSUM(vpmsumb, u8, u16, uint16_t)
1484 PMSUM(vpmsumh, u16, u32, uint32_t)
1485 PMSUM(vpmsumw, u32, u64, uint64_t)
1486
1487 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1488 {
1489 int i, j;
1490 Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
1491
1492 for (j = 0; j < 64; j++) {
1493 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1494 if (a->VsrD(i) & (1ull << j)) {
1495 tmp = int128_make64(b->VsrD(i));
1496 tmp = int128_lshift(tmp, j);
1497 prod[i] = int128_xor(prod[i], tmp);
1498 }
1499 }
1500 }
1501
1502 r->s128 = int128_xor(prod[0], prod[1]);
1503 }
1504
1505 #if HOST_BIG_ENDIAN
1506 #define PKBIG 1
1507 #else
1508 #define PKBIG 0
1509 #endif
1510 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1511 {
1512 int i, j;
1513 ppc_avr_t result;
1514 #if HOST_BIG_ENDIAN
1515 const ppc_avr_t *x[2] = { a, b };
1516 #else
1517 const ppc_avr_t *x[2] = { b, a };
1518 #endif
1519
1520 VECTOR_FOR_INORDER_I(i, u64) {
1521 VECTOR_FOR_INORDER_I(j, u32) {
1522 uint32_t e = x[i]->u32[j];
1523
1524 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1525 ((e >> 6) & 0x3e0) |
1526 ((e >> 3) & 0x1f));
1527 }
1528 }
1529 *r = result;
1530 }
1531
1532 #define VPK(suffix, from, to, cvt, dosat) \
1533 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1534 ppc_avr_t *a, ppc_avr_t *b) \
1535 { \
1536 int i; \
1537 int sat = 0; \
1538 ppc_avr_t result; \
1539 ppc_avr_t *a0 = PKBIG ? a : b; \
1540 ppc_avr_t *a1 = PKBIG ? b : a; \
1541 \
1542 VECTOR_FOR_INORDER_I(i, from) { \
1543 result.to[i] = cvt(a0->from[i], &sat); \
1544 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1545 } \
1546 *r = result; \
1547 if (dosat && sat) { \
1548 set_vscr_sat(env); \
1549 } \
1550 }
1551 #define I(x, y) (x)
1552 VPK(shss, s16, s8, cvtshsb, 1)
1553 VPK(shus, s16, u8, cvtshub, 1)
1554 VPK(swss, s32, s16, cvtswsh, 1)
1555 VPK(swus, s32, u16, cvtswuh, 1)
1556 VPK(sdss, s64, s32, cvtsdsw, 1)
1557 VPK(sdus, s64, u32, cvtsduw, 1)
1558 VPK(uhus, u16, u8, cvtuhub, 1)
1559 VPK(uwus, u32, u16, cvtuwuh, 1)
1560 VPK(udus, u64, u32, cvtuduw, 1)
1561 VPK(uhum, u16, u8, I, 0)
1562 VPK(uwum, u32, u16, I, 0)
1563 VPK(udum, u64, u32, I, 0)
1564 #undef I
1565 #undef VPK
1566 #undef PKBIG
1567
1568 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1569 {
1570 int i;
1571
1572 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1573 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1574 }
1575 }
1576
1577 #define VRFI(suffix, rounding) \
1578 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1579 ppc_avr_t *b) \
1580 { \
1581 int i; \
1582 float_status s = env->vec_status; \
1583 \
1584 set_float_rounding_mode(rounding, &s); \
1585 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1586 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1587 } \
1588 }
1589 VRFI(n, float_round_nearest_even)
1590 VRFI(m, float_round_down)
1591 VRFI(p, float_round_up)
1592 VRFI(z, float_round_to_zero)
1593 #undef VRFI
1594
1595 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1596 {
1597 int i;
1598
1599 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1600 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1601
1602 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1603 }
1604 }
1605
1606 #define VRLMI(name, size, element, insert) \
1607 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1608 { \
1609 int i; \
1610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1611 uint##size##_t src1 = a->element[i]; \
1612 uint##size##_t src2 = b->element[i]; \
1613 uint##size##_t src3 = r->element[i]; \
1614 uint##size##_t begin, end, shift, mask, rot_val; \
1615 \
1616 shift = extract##size(src2, 0, 6); \
1617 end = extract##size(src2, 8, 6); \
1618 begin = extract##size(src2, 16, 6); \
1619 rot_val = rol##size(src1, shift); \
1620 mask = mask_u##size(begin, end); \
1621 if (insert) { \
1622 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1623 } else { \
1624 r->element[i] = (rot_val & mask); \
1625 } \
1626 } \
1627 }
1628
1629 VRLMI(VRLDMI, 64, u64, 1);
1630 VRLMI(VRLWMI, 32, u32, 1);
1631 VRLMI(VRLDNM, 64, u64, 0);
1632 VRLMI(VRLWNM, 32, u32, 0);
1633
1634 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1635 {
1636 int i;
1637
1638 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1639 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1640 }
1641 }
1642
1643 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1644 {
1645 int i;
1646
1647 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1648 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1649 }
1650 }
1651
1652 #define VEXTU_X_DO(name, size, left) \
1653 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1654 { \
1655 int index = (a & 0xf) * 8; \
1656 if (left) { \
1657 index = 128 - index - size; \
1658 } \
1659 return int128_getlo(int128_rshift(b->s128, index)) & \
1660 MAKE_64BIT_MASK(0, size); \
1661 }
1662 VEXTU_X_DO(vextublx, 8, 1)
1663 VEXTU_X_DO(vextuhlx, 16, 1)
1664 VEXTU_X_DO(vextuwlx, 32, 1)
1665 VEXTU_X_DO(vextubrx, 8, 0)
1666 VEXTU_X_DO(vextuhrx, 16, 0)
1667 VEXTU_X_DO(vextuwrx, 32, 0)
1668 #undef VEXTU_X_DO
1669
1670 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1671 {
1672 int i;
1673 unsigned int shift, bytes, size;
1674
1675 size = ARRAY_SIZE(r->u8);
1676 for (i = 0; i < size; i++) {
1677 shift = b->VsrB(i) & 0x7; /* extract shift value */
1678 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1679 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1680 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1681 }
1682 }
1683
1684 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1685 {
1686 int i;
1687 unsigned int shift, bytes;
1688
1689 /*
1690 * Use reverse order, as destination and source register can be
1691 * same. Its being modified in place saving temporary, reverse
1692 * order will guarantee that computed result is not fed back.
1693 */
1694 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1695 shift = b->VsrB(i) & 0x7; /* extract shift value */
1696 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1697 /* extract adjacent bytes */
1698 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1699 }
1700 }
1701
1702 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1703 {
1704 int sh = shift & 0xf;
1705 int i;
1706 ppc_avr_t result;
1707
1708 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1709 int index = sh + i;
1710 if (index > 0xf) {
1711 result.VsrB(i) = b->VsrB(index - 0x10);
1712 } else {
1713 result.VsrB(i) = a->VsrB(index);
1714 }
1715 }
1716 *r = result;
1717 }
1718
1719 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1720 {
1721 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1722
1723 #if HOST_BIG_ENDIAN
1724 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1725 memset(&r->u8[16 - sh], 0, sh);
1726 #else
1727 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1728 memset(&r->u8[0], 0, sh);
1729 #endif
1730 }
1731
1732 #if HOST_BIG_ENDIAN
1733 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1734 #else
1735 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1736 #endif
1737
1738 #define VINSX(SUFFIX, TYPE) \
1739 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1740 uint64_t val, target_ulong index) \
1741 { \
1742 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1743 target_long idx = index; \
1744 \
1745 if (idx < 0 || idx > maxidx) { \
1746 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1747 qemu_log_mask(LOG_GUEST_ERROR, \
1748 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1749 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1750 } else { \
1751 TYPE src = val; \
1752 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1753 } \
1754 }
1755 VINSX(B, uint8_t)
1756 VINSX(H, uint16_t)
1757 VINSX(W, uint32_t)
1758 VINSX(D, uint64_t)
1759 #undef ELEM_ADDR
1760 #undef VINSX
1761 #if HOST_BIG_ENDIAN
1762 #define VEXTDVLX(NAME, SIZE) \
1763 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1764 target_ulong index) \
1765 { \
1766 const target_long idx = index; \
1767 ppc_avr_t tmp[2] = { *a, *b }; \
1768 memset(t, 0, sizeof(*t)); \
1769 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1770 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1771 } else { \
1772 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1773 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1774 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1775 } \
1776 }
1777 #else
1778 #define VEXTDVLX(NAME, SIZE) \
1779 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1780 target_ulong index) \
1781 { \
1782 const target_long idx = index; \
1783 ppc_avr_t tmp[2] = { *b, *a }; \
1784 memset(t, 0, sizeof(*t)); \
1785 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1786 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1787 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1788 } else { \
1789 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1790 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1791 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1792 } \
1793 }
1794 #endif
1795 VEXTDVLX(VEXTDUBVLX, 1)
1796 VEXTDVLX(VEXTDUHVLX, 2)
1797 VEXTDVLX(VEXTDUWVLX, 4)
1798 VEXTDVLX(VEXTDDVLX, 8)
1799 #undef VEXTDVLX
1800 #if HOST_BIG_ENDIAN
1801 #define VEXTRACT(suffix, element) \
1802 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1803 { \
1804 uint32_t es = sizeof(r->element[0]); \
1805 memmove(&r->u8[8 - es], &b->u8[index], es); \
1806 memset(&r->u8[8], 0, 8); \
1807 memset(&r->u8[0], 0, 8 - es); \
1808 }
1809 #else
1810 #define VEXTRACT(suffix, element) \
1811 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1812 { \
1813 uint32_t es = sizeof(r->element[0]); \
1814 uint32_t s = (16 - index) - es; \
1815 memmove(&r->u8[8], &b->u8[s], es); \
1816 memset(&r->u8[0], 0, 8); \
1817 memset(&r->u8[8 + es], 0, 8 - es); \
1818 }
1819 #endif
1820 VEXTRACT(ub, u8)
1821 VEXTRACT(uh, u16)
1822 VEXTRACT(uw, u32)
1823 VEXTRACT(d, u64)
1824 #undef VEXTRACT
1825
1826 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1827 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1828 { \
1829 int i, idx, crf = 0; \
1830 \
1831 for (i = 0; i < NUM_ELEMS; i++) { \
1832 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1833 if (b->Vsr##ELEM(idx)) { \
1834 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1835 } else { \
1836 crf = 0b0010; \
1837 break; \
1838 } \
1839 } \
1840 \
1841 for (; i < NUM_ELEMS; i++) { \
1842 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1843 t->Vsr##ELEM(idx) = 0; \
1844 } \
1845 \
1846 return crf; \
1847 }
1848 VSTRI(VSTRIBL, B, 16, true)
1849 VSTRI(VSTRIBR, B, 16, false)
1850 VSTRI(VSTRIHL, H, 8, true)
1851 VSTRI(VSTRIHR, H, 8, false)
1852 #undef VSTRI
1853
1854 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1855 {
1856 ppc_vsr_t t = { };
1857 size_t es = sizeof(uint32_t);
1858 uint32_t ext_index;
1859 int i;
1860
1861 ext_index = index;
1862 for (i = 0; i < es; i++, ext_index++) {
1863 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1864 }
1865
1866 *xt = t;
1867 }
1868
1869 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1870 {
1871 ppc_vsr_t t = *xt;
1872 size_t es = sizeof(uint32_t);
1873 int ins_index, i = 0;
1874
1875 ins_index = index;
1876 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1877 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1878 }
1879
1880 *xt = t;
1881 }
1882
1883 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1884 uint32_t desc)
1885 {
1886 /*
1887 * Instead of processing imm bit-by-bit, we'll skip the computation of
1888 * conjunctions whose corresponding bit is unset.
1889 */
1890 int bit, imm = simd_data(desc);
1891 Int128 conj, disj = int128_zero();
1892
1893 /* Iterate over set bits from the least to the most significant bit */
1894 while (imm) {
1895 /*
1896 * Get the next bit to be processed with ctz64. Invert the result of
1897 * ctz64 to match the indexing used by PowerISA.
1898 */
1899 bit = 7 - ctzl(imm);
1900 if (bit & 0x4) {
1901 conj = a->s128;
1902 } else {
1903 conj = int128_not(a->s128);
1904 }
1905 if (bit & 0x2) {
1906 conj = int128_and(conj, b->s128);
1907 } else {
1908 conj = int128_and(conj, int128_not(b->s128));
1909 }
1910 if (bit & 0x1) {
1911 conj = int128_and(conj, c->s128);
1912 } else {
1913 conj = int128_and(conj, int128_not(c->s128));
1914 }
1915 disj = int128_or(disj, conj);
1916
1917 /* Unset the least significant bit that is set */
1918 imm &= imm - 1;
1919 }
1920
1921 t->s128 = disj;
1922 }
1923
1924 #define XXBLEND(name, sz) \
1925 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1926 ppc_avr_t *c, uint32_t desc) \
1927 { \
1928 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1929 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1930 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1931 } \
1932 }
1933 XXBLEND(B, 8)
1934 XXBLEND(H, 16)
1935 XXBLEND(W, 32)
1936 XXBLEND(D, 64)
1937 #undef XXBLEND
1938
1939 #define VNEG(name, element) \
1940 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1941 { \
1942 int i; \
1943 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1944 r->element[i] = -b->element[i]; \
1945 } \
1946 }
1947 VNEG(vnegw, s32)
1948 VNEG(vnegd, s64)
1949 #undef VNEG
1950
1951 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1952 {
1953 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1954
1955 #if HOST_BIG_ENDIAN
1956 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1957 memset(&r->u8[0], 0, sh);
1958 #else
1959 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1960 memset(&r->u8[16 - sh], 0, sh);
1961 #endif
1962 }
1963
1964 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1965 {
1966 int i;
1967
1968 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1969 r->u32[i] = a->u32[i] >= b->u32[i];
1970 }
1971 }
1972
1973 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1974 {
1975 int64_t t;
1976 int i, upper;
1977 ppc_avr_t result;
1978 int sat = 0;
1979
1980 upper = ARRAY_SIZE(r->s32) - 1;
1981 t = (int64_t)b->VsrSW(upper);
1982 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1983 t += a->VsrSW(i);
1984 result.VsrSW(i) = 0;
1985 }
1986 result.VsrSW(upper) = cvtsdsw(t, &sat);
1987 *r = result;
1988
1989 if (sat) {
1990 set_vscr_sat(env);
1991 }
1992 }
1993
1994 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1995 {
1996 int i, j, upper;
1997 ppc_avr_t result;
1998 int sat = 0;
1999
2000 upper = 1;
2001 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2002 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
2003
2004 result.VsrD(i) = 0;
2005 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2006 t += a->VsrSW(2 * i + j);
2007 }
2008 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2009 }
2010
2011 *r = result;
2012 if (sat) {
2013 set_vscr_sat(env);
2014 }
2015 }
2016
2017 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2018 {
2019 int i, j;
2020 int sat = 0;
2021
2022 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2023 int64_t t = (int64_t)b->s32[i];
2024
2025 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2026 t += a->s8[4 * i + j];
2027 }
2028 r->s32[i] = cvtsdsw(t, &sat);
2029 }
2030
2031 if (sat) {
2032 set_vscr_sat(env);
2033 }
2034 }
2035
2036 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2037 {
2038 int sat = 0;
2039 int i;
2040
2041 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2042 int64_t t = (int64_t)b->s32[i];
2043
2044 t += a->s16[2 * i] + a->s16[2 * i + 1];
2045 r->s32[i] = cvtsdsw(t, &sat);
2046 }
2047
2048 if (sat) {
2049 set_vscr_sat(env);
2050 }
2051 }
2052
2053 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2054 {
2055 int i, j;
2056 int sat = 0;
2057
2058 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2059 uint64_t t = (uint64_t)b->u32[i];
2060
2061 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2062 t += a->u8[4 * i + j];
2063 }
2064 r->u32[i] = cvtuduw(t, &sat);
2065 }
2066
2067 if (sat) {
2068 set_vscr_sat(env);
2069 }
2070 }
2071
2072 #if HOST_BIG_ENDIAN
2073 #define UPKHI 1
2074 #define UPKLO 0
2075 #else
2076 #define UPKHI 0
2077 #define UPKLO 1
2078 #endif
2079 #define VUPKPX(suffix, hi) \
2080 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2081 { \
2082 int i; \
2083 ppc_avr_t result; \
2084 \
2085 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2086 uint16_t e = b->u16[hi ? i : i + 4]; \
2087 uint8_t a = (e >> 15) ? 0xff : 0; \
2088 uint8_t r = (e >> 10) & 0x1f; \
2089 uint8_t g = (e >> 5) & 0x1f; \
2090 uint8_t b = e & 0x1f; \
2091 \
2092 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2093 } \
2094 *r = result; \
2095 }
2096 VUPKPX(lpx, UPKLO)
2097 VUPKPX(hpx, UPKHI)
2098 #undef VUPKPX
2099
2100 #define VUPK(suffix, unpacked, packee, hi) \
2101 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2102 { \
2103 int i; \
2104 ppc_avr_t result; \
2105 \
2106 if (hi) { \
2107 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2108 result.unpacked[i] = b->packee[i]; \
2109 } \
2110 } else { \
2111 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2112 i++) { \
2113 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2114 } \
2115 } \
2116 *r = result; \
2117 }
2118 VUPK(hsb, s16, s8, UPKHI)
2119 VUPK(hsh, s32, s16, UPKHI)
2120 VUPK(hsw, s64, s32, UPKHI)
2121 VUPK(lsb, s16, s8, UPKLO)
2122 VUPK(lsh, s32, s16, UPKLO)
2123 VUPK(lsw, s64, s32, UPKLO)
2124 #undef VUPK
2125 #undef UPKHI
2126 #undef UPKLO
2127
2128 #define VGENERIC_DO(name, element) \
2129 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2130 { \
2131 int i; \
2132 \
2133 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2134 r->element[i] = name(b->element[i]); \
2135 } \
2136 }
2137
2138 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2139 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2140
2141 VGENERIC_DO(clzb, u8)
2142 VGENERIC_DO(clzh, u16)
2143
2144 #undef clzb
2145 #undef clzh
2146
2147 #define ctzb(v) ((v) ? ctz32(v) : 8)
2148 #define ctzh(v) ((v) ? ctz32(v) : 16)
2149 #define ctzw(v) ctz32((v))
2150 #define ctzd(v) ctz64((v))
2151
2152 VGENERIC_DO(ctzb, u8)
2153 VGENERIC_DO(ctzh, u16)
2154 VGENERIC_DO(ctzw, u32)
2155 VGENERIC_DO(ctzd, u64)
2156
2157 #undef ctzb
2158 #undef ctzh
2159 #undef ctzw
2160 #undef ctzd
2161
2162 #define popcntb(v) ctpop8(v)
2163 #define popcnth(v) ctpop16(v)
2164 #define popcntw(v) ctpop32(v)
2165 #define popcntd(v) ctpop64(v)
2166
2167 VGENERIC_DO(popcntb, u8)
2168 VGENERIC_DO(popcnth, u16)
2169 VGENERIC_DO(popcntw, u32)
2170 VGENERIC_DO(popcntd, u64)
2171
2172 #undef popcntb
2173 #undef popcnth
2174 #undef popcntw
2175 #undef popcntd
2176
2177 #undef VGENERIC_DO
2178
2179 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2180 {
2181 r->s128 = int128_add(a->s128, b->s128);
2182 }
2183
2184 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2185 {
2186 r->s128 = int128_add(int128_add(a->s128, b->s128),
2187 int128_make64(int128_getlo(c->s128) & 1));
2188 }
2189
2190 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2191 {
2192 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
2193 r->VsrD(0) = 0;
2194 }
2195
2196 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2197 {
2198 bool carry_out = int128_ult(int128_not(a->s128), b->s128),
2199 carry_in = int128_getlo(c->s128) & 1;
2200
2201 if (!carry_out && carry_in) {
2202 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
2203 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
2204 }
2205
2206 r->VsrD(0) = 0;
2207 r->VsrD(1) = carry_out;
2208 }
2209
2210 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2211 {
2212 r->s128 = int128_sub(a->s128, b->s128);
2213 }
2214
2215 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2216 {
2217 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
2218 int128_make64(int128_getlo(c->s128) & 1));
2219 }
2220
2221 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2222 {
2223 Int128 tmp = int128_not(b->s128);
2224
2225 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
2226 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
2227 r->VsrD(0) = 0;
2228 }
2229
2230 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2231 {
2232 Int128 tmp = int128_not(b->s128);
2233 bool carry_out = int128_ult(int128_not(a->s128), tmp),
2234 carry_in = int128_getlo(c->s128) & 1;
2235
2236 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
2237 int128_makes64(-1)));
2238 r->VsrD(0) = 0;
2239 }
2240
2241 #define BCD_PLUS_PREF_1 0xC
2242 #define BCD_PLUS_PREF_2 0xF
2243 #define BCD_PLUS_ALT_1 0xA
2244 #define BCD_NEG_PREF 0xD
2245 #define BCD_NEG_ALT 0xB
2246 #define BCD_PLUS_ALT_2 0xE
2247 #define NATIONAL_PLUS 0x2B
2248 #define NATIONAL_NEG 0x2D
2249
2250 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2251
2252 static int bcd_get_sgn(ppc_avr_t *bcd)
2253 {
2254 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2255 case BCD_PLUS_PREF_1:
2256 case BCD_PLUS_PREF_2:
2257 case BCD_PLUS_ALT_1:
2258 case BCD_PLUS_ALT_2:
2259 {
2260 return 1;
2261 }
2262
2263 case BCD_NEG_PREF:
2264 case BCD_NEG_ALT:
2265 {
2266 return -1;
2267 }
2268
2269 default:
2270 {
2271 return 0;
2272 }
2273 }
2274 }
2275
2276 static int bcd_preferred_sgn(int sgn, int ps)
2277 {
2278 if (sgn >= 0) {
2279 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2280 } else {
2281 return BCD_NEG_PREF;
2282 }
2283 }
2284
2285 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2286 {
2287 uint8_t result;
2288 if (n & 1) {
2289 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2290 } else {
2291 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2292 }
2293
2294 if (unlikely(result > 9)) {
2295 *invalid = true;
2296 }
2297 return result;
2298 }
2299
2300 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2301 {
2302 if (n & 1) {
2303 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2304 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2305 } else {
2306 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2307 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2308 }
2309 }
2310
2311 static bool bcd_is_valid(ppc_avr_t *bcd)
2312 {
2313 int i;
2314 int invalid = 0;
2315
2316 if (bcd_get_sgn(bcd) == 0) {
2317 return false;
2318 }
2319
2320 for (i = 1; i < 32; i++) {
2321 bcd_get_digit(bcd, i, &invalid);
2322 if (unlikely(invalid)) {
2323 return false;
2324 }
2325 }
2326 return true;
2327 }
2328
2329 static int bcd_cmp_zero(ppc_avr_t *bcd)
2330 {
2331 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2332 return CRF_EQ;
2333 } else {
2334 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2335 }
2336 }
2337
2338 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2339 {
2340 return reg->VsrH(7 - n);
2341 }
2342
2343 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2344 {
2345 reg->VsrH(7 - n) = val;
2346 }
2347
2348 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2349 {
2350 int i;
2351 int invalid = 0;
2352 for (i = 31; i > 0; i--) {
2353 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2354 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2355 if (unlikely(invalid)) {
2356 return 0; /* doesn't matter */
2357 } else if (dig_a > dig_b) {
2358 return 1;
2359 } else if (dig_a < dig_b) {
2360 return -1;
2361 }
2362 }
2363
2364 return 0;
2365 }
2366
2367 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2368 int *overflow)
2369 {
2370 int carry = 0;
2371 int i;
2372 int is_zero = 1;
2373
2374 for (i = 1; i <= 31; i++) {
2375 uint8_t digit = bcd_get_digit(a, i, invalid) +
2376 bcd_get_digit(b, i, invalid) + carry;
2377 is_zero &= (digit == 0);
2378 if (digit > 9) {
2379 carry = 1;
2380 digit -= 10;
2381 } else {
2382 carry = 0;
2383 }
2384
2385 bcd_put_digit(t, digit, i);
2386 }
2387
2388 *overflow = carry;
2389 return is_zero;
2390 }
2391
2392 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2393 int *overflow)
2394 {
2395 int carry = 0;
2396 int i;
2397
2398 for (i = 1; i <= 31; i++) {
2399 uint8_t digit = bcd_get_digit(a, i, invalid) -
2400 bcd_get_digit(b, i, invalid) + carry;
2401 if (digit & 0x80) {
2402 carry = -1;
2403 digit += 10;
2404 } else {
2405 carry = 0;
2406 }
2407
2408 bcd_put_digit(t, digit, i);
2409 }
2410
2411 *overflow = carry;
2412 }
2413
2414 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2415 {
2416
2417 int sgna = bcd_get_sgn(a);
2418 int sgnb = bcd_get_sgn(b);
2419 int invalid = (sgna == 0) || (sgnb == 0);
2420 int overflow = 0;
2421 int zero = 0;
2422 uint32_t cr = 0;
2423 ppc_avr_t result = { .u64 = { 0, 0 } };
2424
2425 if (!invalid) {
2426 if (sgna == sgnb) {
2427 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2428 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2429 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2430 } else {
2431 int magnitude = bcd_cmp_mag(a, b);
2432 if (magnitude > 0) {
2433 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2434 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2435 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2436 } else if (magnitude < 0) {
2437 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2438 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2439 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2440 } else {
2441 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2442 cr = CRF_EQ;
2443 }
2444 }
2445 }
2446
2447 if (unlikely(invalid)) {
2448 result.VsrD(0) = result.VsrD(1) = -1;
2449 cr = CRF_SO;
2450 } else if (overflow) {
2451 cr |= CRF_SO;
2452 } else if (zero) {
2453 cr |= CRF_EQ;
2454 }
2455
2456 *r = result;
2457
2458 return cr;
2459 }
2460
2461 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2462 {
2463 ppc_avr_t bcopy = *b;
2464 int sgnb = bcd_get_sgn(b);
2465 if (sgnb < 0) {
2466 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2467 } else if (sgnb > 0) {
2468 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2469 }
2470 /* else invalid ... defer to bcdadd code for proper handling */
2471
2472 return helper_bcdadd(r, a, &bcopy, ps);
2473 }
2474
2475 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2476 {
2477 int i;
2478 int cr = 0;
2479 uint16_t national = 0;
2480 uint16_t sgnb = get_national_digit(b, 0);
2481 ppc_avr_t ret = { .u64 = { 0, 0 } };
2482 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2483
2484 for (i = 1; i < 8; i++) {
2485 national = get_national_digit(b, i);
2486 if (unlikely(national < 0x30 || national > 0x39)) {
2487 invalid = 1;
2488 break;
2489 }
2490
2491 bcd_put_digit(&ret, national & 0xf, i);
2492 }
2493
2494 if (sgnb == NATIONAL_PLUS) {
2495 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2496 } else {
2497 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2498 }
2499
2500 cr = bcd_cmp_zero(&ret);
2501
2502 if (unlikely(invalid)) {
2503 cr = CRF_SO;
2504 }
2505
2506 *r = ret;
2507
2508 return cr;
2509 }
2510
2511 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2512 {
2513 int i;
2514 int cr = 0;
2515 int sgnb = bcd_get_sgn(b);
2516 int invalid = (sgnb == 0);
2517 ppc_avr_t ret = { .u64 = { 0, 0 } };
2518
2519 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2520
2521 for (i = 1; i < 8; i++) {
2522 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2523
2524 if (unlikely(invalid)) {
2525 break;
2526 }
2527 }
2528 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2529
2530 cr = bcd_cmp_zero(b);
2531
2532 if (ox_flag) {
2533 cr |= CRF_SO;
2534 }
2535
2536 if (unlikely(invalid)) {
2537 cr = CRF_SO;
2538 }
2539
2540 *r = ret;
2541
2542 return cr;
2543 }
2544
2545 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2546 {
2547 int i;
2548 int cr = 0;
2549 int invalid = 0;
2550 int zone_digit = 0;
2551 int zone_lead = ps ? 0xF : 0x3;
2552 int digit = 0;
2553 ppc_avr_t ret = { .u64 = { 0, 0 } };
2554 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2555
2556 if (unlikely((sgnb < 0xA) && ps)) {
2557 invalid = 1;
2558 }
2559
2560 for (i = 0; i < 16; i++) {
2561 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2562 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2563 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2564 invalid = 1;
2565 break;
2566 }
2567
2568 bcd_put_digit(&ret, digit, i + 1);
2569 }
2570
2571 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2572 (!ps && (sgnb & 0x4))) {
2573 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2574 } else {
2575 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2576 }
2577
2578 cr = bcd_cmp_zero(&ret);
2579
2580 if (unlikely(invalid)) {
2581 cr = CRF_SO;
2582 }
2583
2584 *r = ret;
2585
2586 return cr;
2587 }
2588
2589 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2590 {
2591 int i;
2592 int cr = 0;
2593 uint8_t digit = 0;
2594 int sgnb = bcd_get_sgn(b);
2595 int zone_lead = (ps) ? 0xF0 : 0x30;
2596 int invalid = (sgnb == 0);
2597 ppc_avr_t ret = { .u64 = { 0, 0 } };
2598
2599 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2600
2601 for (i = 0; i < 16; i++) {
2602 digit = bcd_get_digit(b, i + 1, &invalid);
2603
2604 if (unlikely(invalid)) {
2605 break;
2606 }
2607
2608 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2609 }
2610
2611 if (ps) {
2612 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2613 } else {
2614 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2615 }
2616
2617 cr = bcd_cmp_zero(b);
2618
2619 if (ox_flag) {
2620 cr |= CRF_SO;
2621 }
2622
2623 if (unlikely(invalid)) {
2624 cr = CRF_SO;
2625 }
2626
2627 *r = ret;
2628
2629 return cr;
2630 }
2631
2632 /**
2633 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2634 *
2635 * Returns:
2636 * > 0 if ahi|alo > bhi|blo,
2637 * 0 if ahi|alo == bhi|blo,
2638 * < 0 if ahi|alo < bhi|blo
2639 */
2640 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2641 uint64_t blo, uint64_t bhi)
2642 {
2643 return (ahi == bhi) ?
2644 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2645 (ahi > bhi ? 1 : -1);
2646 }
2647
2648 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2649 {
2650 int i;
2651 int cr;
2652 uint64_t lo_value;
2653 uint64_t hi_value;
2654 uint64_t rem;
2655 ppc_avr_t ret = { .u64 = { 0, 0 } };
2656
2657 if (b->VsrSD(0) < 0) {
2658 lo_value = -b->VsrSD(1);
2659 hi_value = ~b->VsrD(0) + !lo_value;
2660 bcd_put_digit(&ret, 0xD, 0);
2661
2662 cr = CRF_LT;
2663 } else {
2664 lo_value = b->VsrD(1);
2665 hi_value = b->VsrD(0);
2666 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2667
2668 if (hi_value == 0 && lo_value == 0) {
2669 cr = CRF_EQ;
2670 } else {
2671 cr = CRF_GT;
2672 }
2673 }
2674
2675 /*
2676 * Check src limits: abs(src) <= 10^31 - 1
2677 *
2678 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2679 */
2680 if (ucmp128(lo_value, hi_value,
2681 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2682 cr |= CRF_SO;
2683
2684 /*
2685 * According to the ISA, if src wouldn't fit in the destination
2686 * register, the result is undefined.
2687 * In that case, we leave r unchanged.
2688 */
2689 } else {
2690 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2691
2692 for (i = 1; i < 16; rem /= 10, i++) {
2693 bcd_put_digit(&ret, rem % 10, i);
2694 }
2695
2696 for (; i < 32; lo_value /= 10, i++) {
2697 bcd_put_digit(&ret, lo_value % 10, i);
2698 }
2699
2700 *r = ret;
2701 }
2702
2703 return cr;
2704 }
2705
2706 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2707 {
2708 uint8_t i;
2709 int cr;
2710 uint64_t carry;
2711 uint64_t unused;
2712 uint64_t lo_value;
2713 uint64_t hi_value = 0;
2714 int sgnb = bcd_get_sgn(b);
2715 int invalid = (sgnb == 0);
2716
2717 lo_value = bcd_get_digit(b, 31, &invalid);
2718 for (i = 30; i > 0; i--) {
2719 mulu64(&lo_value, &carry, lo_value, 10ULL);
2720 mulu64(&hi_value, &unused, hi_value, 10ULL);
2721 lo_value += bcd_get_digit(b, i, &invalid);
2722 hi_value += carry;
2723
2724 if (unlikely(invalid)) {
2725 break;
2726 }
2727 }
2728
2729 if (sgnb == -1) {
2730 r->VsrSD(1) = -lo_value;
2731 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2732 } else {
2733 r->VsrSD(1) = lo_value;
2734 r->VsrSD(0) = hi_value;
2735 }
2736
2737 cr = bcd_cmp_zero(b);
2738
2739 if (unlikely(invalid)) {
2740 cr = CRF_SO;
2741 }
2742
2743 return cr;
2744 }
2745
2746 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2747 {
2748 int i;
2749 int invalid = 0;
2750
2751 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2752 return CRF_SO;
2753 }
2754
2755 *r = *a;
2756 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2757
2758 for (i = 1; i < 32; i++) {
2759 bcd_get_digit(a, i, &invalid);
2760 bcd_get_digit(b, i, &invalid);
2761 if (unlikely(invalid)) {
2762 return CRF_SO;
2763 }
2764 }
2765
2766 return bcd_cmp_zero(r);
2767 }
2768
2769 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2770 {
2771 int sgnb = bcd_get_sgn(b);
2772
2773 *r = *b;
2774 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2775
2776 if (bcd_is_valid(b) == false) {
2777 return CRF_SO;
2778 }
2779
2780 return bcd_cmp_zero(r);
2781 }
2782
2783 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2784 {
2785 int cr;
2786 int i = a->VsrSB(7);
2787 bool ox_flag = false;
2788 int sgnb = bcd_get_sgn(b);
2789 ppc_avr_t ret = *b;
2790 ret.VsrD(1) &= ~0xf;
2791
2792 if (bcd_is_valid(b) == false) {
2793 return CRF_SO;
2794 }
2795
2796 if (unlikely(i > 31)) {
2797 i = 31;
2798 } else if (unlikely(i < -31)) {
2799 i = -31;
2800 }
2801
2802 if (i > 0) {
2803 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2804 } else {
2805 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2806 }
2807 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2808
2809 *r = ret;
2810
2811 cr = bcd_cmp_zero(r);
2812 if (ox_flag) {
2813 cr |= CRF_SO;
2814 }
2815
2816 return cr;
2817 }
2818
2819 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2820 {
2821 int cr;
2822 int i;
2823 int invalid = 0;
2824 bool ox_flag = false;
2825 ppc_avr_t ret = *b;
2826
2827 for (i = 0; i < 32; i++) {
2828 bcd_get_digit(b, i, &invalid);
2829
2830 if (unlikely(invalid)) {
2831 return CRF_SO;
2832 }
2833 }
2834
2835 i = a->VsrSB(7);
2836 if (i >= 32) {
2837 ox_flag = true;
2838 ret.VsrD(1) = ret.VsrD(0) = 0;
2839 } else if (i <= -32) {
2840 ret.VsrD(1) = ret.VsrD(0) = 0;
2841 } else if (i > 0) {
2842 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2843 } else {
2844 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2845 }
2846 *r = ret;
2847
2848 cr = bcd_cmp_zero(r);
2849 if (ox_flag) {
2850 cr |= CRF_SO;
2851 }
2852
2853 return cr;
2854 }
2855
2856 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2857 {
2858 int cr;
2859 int unused = 0;
2860 int invalid = 0;
2861 bool ox_flag = false;
2862 int sgnb = bcd_get_sgn(b);
2863 ppc_avr_t ret = *b;
2864 ret.VsrD(1) &= ~0xf;
2865
2866 int i = a->VsrSB(7);
2867 ppc_avr_t bcd_one;
2868
2869 bcd_one.VsrD(0) = 0;
2870 bcd_one.VsrD(1) = 0x10;
2871
2872 if (bcd_is_valid(b) == false) {
2873 return CRF_SO;
2874 }
2875
2876 if (unlikely(i > 31)) {
2877 i = 31;
2878 } else if (unlikely(i < -31)) {
2879 i = -31;
2880 }
2881
2882 if (i > 0) {
2883 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2884 } else {
2885 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2886
2887 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2888 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2889 }
2890 }
2891 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2892
2893 cr = bcd_cmp_zero(&ret);
2894 if (ox_flag) {
2895 cr |= CRF_SO;
2896 }
2897 *r = ret;
2898
2899 return cr;
2900 }
2901
2902 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2903 {
2904 uint64_t mask;
2905 uint32_t ox_flag = 0;
2906 int i = a->VsrSH(3) + 1;
2907 ppc_avr_t ret = *b;
2908
2909 if (bcd_is_valid(b) == false) {
2910 return CRF_SO;
2911 }
2912
2913 if (i > 16 && i < 32) {
2914 mask = (uint64_t)-1 >> (128 - i * 4);
2915 if (ret.VsrD(0) & ~mask) {
2916 ox_flag = CRF_SO;
2917 }
2918
2919 ret.VsrD(0) &= mask;
2920 } else if (i >= 0 && i <= 16) {
2921 mask = (uint64_t)-1 >> (64 - i * 4);
2922 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2923 ox_flag = CRF_SO;
2924 }
2925
2926 ret.VsrD(1) &= mask;
2927 ret.VsrD(0) = 0;
2928 }
2929 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2930 *r = ret;
2931
2932 return bcd_cmp_zero(&ret) | ox_flag;
2933 }
2934
2935 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2936 {
2937 int i;
2938 uint64_t mask;
2939 uint32_t ox_flag = 0;
2940 int invalid = 0;
2941 ppc_avr_t ret = *b;
2942
2943 for (i = 0; i < 32; i++) {
2944 bcd_get_digit(b, i, &invalid);
2945
2946 if (unlikely(invalid)) {
2947 return CRF_SO;
2948 }
2949 }
2950
2951 i = a->VsrSH(3);
2952 if (i > 16 && i < 33) {
2953 mask = (uint64_t)-1 >> (128 - i * 4);
2954 if (ret.VsrD(0) & ~mask) {
2955 ox_flag = CRF_SO;
2956 }
2957
2958 ret.VsrD(0) &= mask;
2959 } else if (i > 0 && i <= 16) {
2960 mask = (uint64_t)-1 >> (64 - i * 4);
2961 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2962 ox_flag = CRF_SO;
2963 }
2964
2965 ret.VsrD(1) &= mask;
2966 ret.VsrD(0) = 0;
2967 } else if (i == 0) {
2968 if (ret.VsrD(0) || ret.VsrD(1)) {
2969 ox_flag = CRF_SO;
2970 }
2971 ret.VsrD(0) = ret.VsrD(1) = 0;
2972 }
2973
2974 *r = ret;
2975 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2976 return ox_flag | CRF_EQ;
2977 }
2978
2979 return ox_flag | CRF_GT;
2980 }
2981
2982 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2983 {
2984 int i;
2985 VECTOR_FOR_INORDER_I(i, u8) {
2986 r->u8[i] = AES_sbox[a->u8[i]];
2987 }
2988 }
2989
2990 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2991 {
2992 ppc_avr_t result;
2993 int i;
2994
2995 VECTOR_FOR_INORDER_I(i, u32) {
2996 result.VsrW(i) = b->VsrW(i) ^
2997 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2998 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2999 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3000 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3001 }
3002 *r = result;
3003 }
3004
3005 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3006 {
3007 ppc_avr_t result;
3008 int i;
3009
3010 VECTOR_FOR_INORDER_I(i, u8) {
3011 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3012 }
3013 *r = result;
3014 }
3015
3016 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3017 {
3018 /* This differs from what is written in ISA V2.07. The RTL is */
3019 /* incorrect and will be fixed in V2.07B. */
3020 int i;
3021 ppc_avr_t tmp;
3022
3023 VECTOR_FOR_INORDER_I(i, u8) {
3024 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3025 }
3026
3027 VECTOR_FOR_INORDER_I(i, u32) {
3028 r->VsrW(i) =
3029 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3030 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3031 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3032 AES_imc[tmp.VsrB(4 * i + 3)][3];
3033 }
3034 }
3035
3036 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3037 {
3038 ppc_avr_t result;
3039 int i;
3040
3041 VECTOR_FOR_INORDER_I(i, u8) {
3042 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3043 }
3044 *r = result;
3045 }
3046
3047 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3048 {
3049 int st = (st_six & 0x10) != 0;
3050 int six = st_six & 0xF;
3051 int i;
3052
3053 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3054 if (st == 0) {
3055 if ((six & (0x8 >> i)) == 0) {
3056 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3057 ror32(a->VsrW(i), 18) ^
3058 (a->VsrW(i) >> 3);
3059 } else { /* six.bit[i] == 1 */
3060 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3061 ror32(a->VsrW(i), 19) ^
3062 (a->VsrW(i) >> 10);
3063 }
3064 } else { /* st == 1 */
3065 if ((six & (0x8 >> i)) == 0) {
3066 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3067 ror32(a->VsrW(i), 13) ^
3068 ror32(a->VsrW(i), 22);
3069 } else { /* six.bit[i] == 1 */
3070 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3071 ror32(a->VsrW(i), 11) ^
3072 ror32(a->VsrW(i), 25);
3073 }
3074 }
3075 }
3076 }
3077
3078 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3079 {
3080 int st = (st_six & 0x10) != 0;
3081 int six = st_six & 0xF;
3082 int i;
3083
3084 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3085 if (st == 0) {
3086 if ((six & (0x8 >> (2 * i))) == 0) {
3087 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3088 ror64(a->VsrD(i), 8) ^
3089 (a->VsrD(i) >> 7);
3090 } else { /* six.bit[2*i] == 1 */
3091 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3092 ror64(a->VsrD(i), 61) ^
3093 (a->VsrD(i) >> 6);
3094 }
3095 } else { /* st == 1 */
3096 if ((six & (0x8 >> (2 * i))) == 0) {
3097 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3098 ror64(a->VsrD(i), 34) ^
3099 ror64(a->VsrD(i), 39);
3100 } else { /* six.bit[2*i] == 1 */
3101 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3102 ror64(a->VsrD(i), 18) ^
3103 ror64(a->VsrD(i), 41);
3104 }
3105 }
3106 }
3107 }
3108
3109 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3110 {
3111 ppc_avr_t result;
3112 int i;
3113
3114 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3115 int indexA = c->VsrB(i) >> 4;
3116 int indexB = c->VsrB(i) & 0xF;
3117
3118 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3119 }
3120 *r = result;
3121 }
3122
3123 #undef VECTOR_FOR_INORDER_I
3124
3125 /*****************************************************************************/
3126 /* SPE extension helpers */
3127 /* Use a table to make this quicker */
3128 static const uint8_t hbrev[16] = {
3129 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3130 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3131 };
3132
3133 static inline uint8_t byte_reverse(uint8_t val)
3134 {
3135 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3136 }
3137
3138 static inline uint32_t word_reverse(uint32_t val)
3139 {
3140 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3141 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3142 }
3143
3144 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3145 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3146 {
3147 uint32_t a, b, d, mask;
3148
3149 mask = UINT32_MAX >> (32 - MASKBITS);
3150 a = arg1 & mask;
3151 b = arg2 & mask;
3152 d = word_reverse(1 + word_reverse(a | ~b));
3153 return (arg1 & ~mask) | (d & b);
3154 }
3155
3156 uint32_t helper_cntlsw32(uint32_t val)
3157 {
3158 if (val & 0x80000000) {
3159 return clz32(~val);
3160 } else {
3161 return clz32(val);
3162 }
3163 }
3164
3165 uint32_t helper_cntlzw32(uint32_t val)
3166 {
3167 return clz32(val);
3168 }
3169
3170 /* 440 specific */
3171 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3172 target_ulong low, uint32_t update_Rc)
3173 {
3174 target_ulong mask;
3175 int i;
3176
3177 i = 1;
3178 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3179 if ((high & mask) == 0) {
3180 if (update_Rc) {
3181 env->crf[0] = 0x4;
3182 }
3183 goto done;
3184 }
3185 i++;
3186 }
3187 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3188 if ((low & mask) == 0) {
3189 if (update_Rc) {
3190 env->crf[0] = 0x8;
3191 }
3192 goto done;
3193 }
3194 i++;
3195 }
3196 i = 8;
3197 if (update_Rc) {
3198 env->crf[0] = 0x2;
3199 }
3200 done:
3201 env->xer = (env->xer & ~0x7F) | i;
3202 if (update_Rc) {
3203 env->crf[0] |= xer_so;
3204 }
3205 return i;
3206 }