]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
Merge tag 'm68k-for-7.1-pull-request' of https://github.com/vivier/qemu-m68k into...
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
32
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
36
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 {
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
43 }
44 }
45
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48 {
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
68 }
69
70 return (target_ulong)rt;
71 }
72
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75 {
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
96 }
97
98 return (target_ulong)rt;
99 }
100
101 #if defined(TARGET_PPC64)
102
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 {
105 uint64_t rt = 0;
106 int overflow = 0;
107
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
113 }
114
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
117 }
118
119 return rt;
120 }
121
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 {
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
128
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
134 }
135
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
138 }
139
140 return rt;
141 }
142
143 #endif
144
145
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
150 /*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 {
163 return hasvalue(rb, ra) ? CRF_GT : 0;
164 }
165
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
169
170 /*
171 * Return a random number.
172 */
173 uint64_t helper_darn32(void)
174 {
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
184
185 return ret;
186 }
187
188 uint64_t helper_darn64(void)
189 {
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
199
200 return ret;
201 }
202
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 {
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217 }
218
219 #endif
220
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 {
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234 }
235
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
239 {
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
250 }
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
254 }
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
258 }
259 return (target_long)ret;
260 }
261
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
265 {
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
276 }
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
280 }
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
284 }
285 return ret;
286 }
287 #endif
288
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
291 {
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300 }
301
302 target_ulong helper_popcntw(target_ulong val)
303 {
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316 }
317 #else
318 target_ulong helper_popcntb(target_ulong val)
319 {
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325 }
326 #endif
327
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
329 {
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388 }
389
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 {
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406 }
407
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 {
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424 }
425
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if HOST_BIG_ENDIAN
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
435
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
478
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 {
481 ppc_store_vscr(env, vscr);
482 }
483
484 uint32_t helper_mfvscr(CPUPPCState *env)
485 {
486 return ppc_get_vscr(env);
487 }
488
489 static inline void set_vscr_sat(CPUPPCState *env)
490 {
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
493 }
494
495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 {
497 int i;
498
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
501 }
502 }
503
504 /* vprtybw */
505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 {
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
512 }
513 }
514
515 /* vprtybd */
516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 {
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
524 }
525 }
526
527 /* vprtybq */
528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 {
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
536 }
537
538 #define VARITHFP(suffix, func) \
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
541 { \
542 int i; \
543 \
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
546 } \
547 }
548 VARITHFP(addfp, float32_add)
549 VARITHFP(subfp, float32_sub)
550 VARITHFP(minfp, float32_min)
551 VARITHFP(maxfp, float32_max)
552 #undef VARITHFP
553
554 #define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
557 { \
558 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
562 } \
563 }
564 VARITHFPFMA(maddfp, 0);
565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566 #undef VARITHFPFMA
567
568 #define VARITHSAT_CASE(type, op, cvt, element) \
569 { \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
572 }
573
574 #define VARITHSAT_DO(name, op, optype, cvt, element) \
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 { \
578 int sat = 0; \
579 int i; \
580 \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
582 VARITHSAT_CASE(optype, op, cvt, element); \
583 } \
584 if (sat) { \
585 vscr_sat->u32[0] = 1; \
586 } \
587 }
588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600 #undef VARITHSAT_CASE
601 #undef VARITHSAT_DO
602 #undef VARITHSAT_SIGNED
603 #undef VARITHSAT_UNSIGNED
604
605 #define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 { \
608 int i; \
609 \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
613 } \
614 }
615
616 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620 VAVG(b, s8, int16_t, u8, uint16_t)
621 VAVG(h, s16, int32_t, u16, uint32_t)
622 VAVG(w, s32, int64_t, u32, uint64_t)
623 #undef VAVG_DO
624 #undef VAVG
625
626 #define VABSDU_DO(name, element) \
627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 int i; \
630 \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
635 } \
636 }
637
638 /*
639 * VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
642 */
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645 VABSDU(b, u8)
646 VABSDU(h, u16)
647 VABSDU(w, u32)
648 #undef VABSDU_DO
649 #undef VABSDU
650
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
654 { \
655 int i; \
656 \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
660 } \
661 }
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
664 #undef VCF
665
666 #define VCMPNEZ(NAME, ELEM) \
667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 { \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
672 } \
673 }
674 VCMPNEZ(VCMPNEZB, u8)
675 VCMPNEZ(VCMPNEZH, u16)
676 VCMPNEZ(VCMPNEZW, u32)
677 #undef VCMPNEZ
678
679 #define VCMPFP_DO(suffix, compare, order, record) \
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
682 { \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
687 \
688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
689 uint32_t result; \
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
699 } \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
703 } \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
706 } \
707 }
708 #define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711 VCMPFP(eqfp, ==, float_relation_equal)
712 VCMPFP(gefp, !=, float_relation_less)
713 VCMPFP(gtfp, ==, float_relation_greater)
714 #undef VCMPFP_DO
715 #undef VCMPFP
716
717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
719 {
720 int i;
721 int all_in = 0;
722
723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 all_in = 1;
729 } else {
730 float32 bneg = float32_chs(b->f32[i]);
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
735
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
738 }
739 }
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
742 }
743 }
744
745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 0);
748 }
749
750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
752 {
753 vcmpbfp_internal(env, r, a, b, 1);
754 }
755
756 #define VCT(suffix, satcvt, element) \
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
759 { \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
763 \
764 set_float_rounding_mode(float_round_to_zero, &s); \
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
767 r->element[i] = 0; \
768 } else { \
769 float64 t = float32_to_float64(b->f32[i], &s); \
770 int64_t j; \
771 \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
775 } \
776 } \
777 if (sat) { \
778 set_vscr_sat(env); \
779 } \
780 }
781 VCT(uxs, cvtsduw, u32)
782 VCT(sxs, cvtsdsw, s32)
783 #undef VCT
784
785 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
786
787 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
788 {
789 int64_t psum = 0;
790 for (int i = 0; i < 8; i++, mask >>= 1) {
791 if (mask & 1) {
792 psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
793 }
794 }
795 return psum;
796 }
797
798 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
799 {
800 int64_t psum = 0;
801 for (int i = 0; i < 4; i++, mask >>= 1) {
802 if (mask & 1) {
803 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
804 }
805 }
806 return psum;
807 }
808
809 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
810 {
811 int64_t psum = 0;
812 for (int i = 0; i < 2; i++, mask >>= 1) {
813 if (mask & 1) {
814 psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16);
815 }
816 }
817 return psum;
818 }
819
820 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
821 uint32_t mask, bool sat, bool acc, do_ger ger)
822 {
823 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
824 xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
825 ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
826 uint8_t xmsk_bit, ymsk_bit;
827 int64_t psum;
828 int i, j;
829 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
830 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
831 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
832 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
833 if (acc) {
834 psum += at[i].VsrSW(j);
835 }
836 if (sat && psum > INT32_MAX) {
837 set_vscr_sat(env);
838 at[i].VsrSW(j) = INT32_MAX;
839 } else if (sat && psum < INT32_MIN) {
840 set_vscr_sat(env);
841 at[i].VsrSW(j) = INT32_MIN;
842 } else {
843 at[i].VsrSW(j) = (int32_t) psum;
844 }
845 } else {
846 at[i].VsrSW(j) = 0;
847 }
848 }
849 }
850 }
851
852 QEMU_FLATTEN
853 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
854 ppc_acc_t *at, uint32_t mask)
855 {
856 xviger(env, a, b, at, mask, false, false, ger_rank8);
857 }
858
859 QEMU_FLATTEN
860 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
861 ppc_acc_t *at, uint32_t mask)
862 {
863 xviger(env, a, b, at, mask, false, true, ger_rank8);
864 }
865
866 QEMU_FLATTEN
867 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
868 ppc_acc_t *at, uint32_t mask)
869 {
870 xviger(env, a, b, at, mask, false, false, ger_rank4);
871 }
872
873 QEMU_FLATTEN
874 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
875 ppc_acc_t *at, uint32_t mask)
876 {
877 xviger(env, a, b, at, mask, false, true, ger_rank4);
878 }
879
880 QEMU_FLATTEN
881 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
882 ppc_acc_t *at, uint32_t mask)
883 {
884 xviger(env, a, b, at, mask, true, true, ger_rank4);
885 }
886
887 QEMU_FLATTEN
888 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
889 ppc_acc_t *at, uint32_t mask)
890 {
891 xviger(env, a, b, at, mask, false, false, ger_rank2);
892 }
893
894 QEMU_FLATTEN
895 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
896 ppc_acc_t *at, uint32_t mask)
897 {
898 xviger(env, a, b, at, mask, true, false, ger_rank2);
899 }
900
901 QEMU_FLATTEN
902 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
903 ppc_acc_t *at, uint32_t mask)
904 {
905 xviger(env, a, b, at, mask, false, true, ger_rank2);
906 }
907
908 QEMU_FLATTEN
909 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
910 ppc_acc_t *at, uint32_t mask)
911 {
912 xviger(env, a, b, at, mask, true, true, ger_rank2);
913 }
914
915 target_ulong helper_vclzlsbb(ppc_avr_t *r)
916 {
917 target_ulong count = 0;
918 int i;
919 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
920 if (r->VsrB(i) & 0x01) {
921 break;
922 }
923 count++;
924 }
925 return count;
926 }
927
928 target_ulong helper_vctzlsbb(ppc_avr_t *r)
929 {
930 target_ulong count = 0;
931 int i;
932 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
933 if (r->VsrB(i) & 0x01) {
934 break;
935 }
936 count++;
937 }
938 return count;
939 }
940
941 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
942 ppc_avr_t *b, ppc_avr_t *c)
943 {
944 int sat = 0;
945 int i;
946
947 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
948 int32_t prod = a->s16[i] * b->s16[i];
949 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
950
951 r->s16[i] = cvtswsh(t, &sat);
952 }
953
954 if (sat) {
955 set_vscr_sat(env);
956 }
957 }
958
959 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
960 ppc_avr_t *b, ppc_avr_t *c)
961 {
962 int sat = 0;
963 int i;
964
965 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
966 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
967 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
968 r->s16[i] = cvtswsh(t, &sat);
969 }
970
971 if (sat) {
972 set_vscr_sat(env);
973 }
974 }
975
976 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
977 {
978 int i;
979
980 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
981 int32_t prod = a->s16[i] * b->s16[i];
982 r->s16[i] = (int16_t) (prod + c->s16[i]);
983 }
984 }
985
986 #define VMRG_DO(name, element, access, ofs) \
987 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
988 { \
989 ppc_avr_t result; \
990 int i, half = ARRAY_SIZE(r->element) / 2; \
991 \
992 for (i = 0; i < half; i++) { \
993 result.access(i * 2 + 0) = a->access(i + ofs); \
994 result.access(i * 2 + 1) = b->access(i + ofs); \
995 } \
996 *r = result; \
997 }
998
999 #define VMRG(suffix, element, access) \
1000 VMRG_DO(mrgl##suffix, element, access, half) \
1001 VMRG_DO(mrgh##suffix, element, access, 0)
1002 VMRG(b, u8, VsrB)
1003 VMRG(h, u16, VsrH)
1004 VMRG(w, u32, VsrW)
1005 #undef VMRG_DO
1006 #undef VMRG
1007
1008 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1009 {
1010 int32_t prod[16];
1011 int i;
1012
1013 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1014 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1015 }
1016
1017 VECTOR_FOR_INORDER_I(i, s32) {
1018 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1019 prod[4 * i + 2] + prod[4 * i + 3];
1020 }
1021 }
1022
1023 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1024 {
1025 int32_t prod[8];
1026 int i;
1027
1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1029 prod[i] = a->s16[i] * b->s16[i];
1030 }
1031
1032 VECTOR_FOR_INORDER_I(i, s32) {
1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1034 }
1035 }
1036
1037 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1038 ppc_avr_t *b, ppc_avr_t *c)
1039 {
1040 int32_t prod[8];
1041 int i;
1042 int sat = 0;
1043
1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1045 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1046 }
1047
1048 VECTOR_FOR_INORDER_I(i, s32) {
1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1050
1051 r->u32[i] = cvtsdsw(t, &sat);
1052 }
1053
1054 if (sat) {
1055 set_vscr_sat(env);
1056 }
1057 }
1058
1059 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1060 {
1061 uint16_t prod[16];
1062 int i;
1063
1064 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1065 prod[i] = a->u8[i] * b->u8[i];
1066 }
1067
1068 VECTOR_FOR_INORDER_I(i, u32) {
1069 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1070 prod[4 * i + 2] + prod[4 * i + 3];
1071 }
1072 }
1073
1074 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1075 {
1076 uint32_t prod[8];
1077 int i;
1078
1079 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1080 prod[i] = a->u16[i] * b->u16[i];
1081 }
1082
1083 VECTOR_FOR_INORDER_I(i, u32) {
1084 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1085 }
1086 }
1087
1088 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1089 ppc_avr_t *b, ppc_avr_t *c)
1090 {
1091 uint32_t prod[8];
1092 int i;
1093 int sat = 0;
1094
1095 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1096 prod[i] = a->u16[i] * b->u16[i];
1097 }
1098
1099 VECTOR_FOR_INORDER_I(i, s32) {
1100 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1101
1102 r->u32[i] = cvtuduw(t, &sat);
1103 }
1104
1105 if (sat) {
1106 set_vscr_sat(env);
1107 }
1108 }
1109
1110 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1111 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1112 { \
1113 int i; \
1114 \
1115 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1116 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1117 (cast)b->mul_access(i); \
1118 } \
1119 }
1120
1121 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1122 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1123 { \
1124 int i; \
1125 \
1126 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1127 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1128 (cast)b->mul_access(i + 1); \
1129 } \
1130 }
1131
1132 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1133 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1134 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1135 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1136 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1137 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1138 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1139 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1140 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1141 #undef VMUL_DO_EVN
1142 #undef VMUL_DO_ODD
1143 #undef VMUL
1144
1145 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1146 target_ulong uim)
1147 {
1148 int i, idx;
1149 ppc_vsr_t tmp = { .u64 = {0, 0} };
1150
1151 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1152 if ((pcv->VsrB(i) >> 5) == uim) {
1153 idx = pcv->VsrB(i) & 0x1f;
1154 if (idx < ARRAY_SIZE(t->u8)) {
1155 tmp.VsrB(i) = s0->VsrB(idx);
1156 } else {
1157 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1158 }
1159 }
1160 }
1161
1162 *t = tmp;
1163 }
1164
1165 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1166 {
1167 ppc_avr_t result;
1168 int i;
1169
1170 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1171 int s = c->VsrB(i) & 0x1f;
1172 int index = s & 0xf;
1173
1174 if (s & 0x10) {
1175 result.VsrB(i) = b->VsrB(index);
1176 } else {
1177 result.VsrB(i) = a->VsrB(index);
1178 }
1179 }
1180 *r = result;
1181 }
1182
1183 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1184 {
1185 ppc_avr_t result;
1186 int i;
1187
1188 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1189 int s = c->VsrB(i) & 0x1f;
1190 int index = 15 - (s & 0xf);
1191
1192 if (s & 0x10) {
1193 result.VsrB(i) = a->VsrB(index);
1194 } else {
1195 result.VsrB(i) = b->VsrB(index);
1196 }
1197 }
1198 *r = result;
1199 }
1200
1201 #define XXGENPCV_BE_EXP(NAME, SZ) \
1202 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1203 { \
1204 ppc_vsr_t tmp; \
1205 \
1206 /* Initialize tmp with the result of an all-zeros mask */ \
1207 tmp.VsrD(0) = 0x1011121314151617; \
1208 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1209 \
1210 /* Iterate over the most significant byte of each element */ \
1211 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1212 if (b->VsrB(i) & 0x80) { \
1213 /* Update each byte of the element */ \
1214 for (int k = 0; k < SZ; k++) { \
1215 tmp.VsrB(i + k) = j + k; \
1216 } \
1217 j += SZ; \
1218 } \
1219 } \
1220 \
1221 *t = tmp; \
1222 }
1223
1224 #define XXGENPCV_BE_COMP(NAME, SZ) \
1225 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1226 { \
1227 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1228 \
1229 /* Iterate over the most significant byte of each element */ \
1230 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1231 if (b->VsrB(i) & 0x80) { \
1232 /* Update each byte of the element */ \
1233 for (int k = 0; k < SZ; k++) { \
1234 tmp.VsrB(j + k) = i + k; \
1235 } \
1236 j += SZ; \
1237 } \
1238 } \
1239 \
1240 *t = tmp; \
1241 }
1242
1243 #define XXGENPCV_LE_EXP(NAME, SZ) \
1244 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1245 { \
1246 ppc_vsr_t tmp; \
1247 \
1248 /* Initialize tmp with the result of an all-zeros mask */ \
1249 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1250 tmp.VsrD(1) = 0x1716151413121110; \
1251 \
1252 /* Iterate over the most significant byte of each element */ \
1253 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1254 /* Reverse indexing of "i" */ \
1255 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1256 if (b->VsrB(idx) & 0x80) { \
1257 /* Update each byte of the element */ \
1258 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1259 tmp.VsrB(idx + rk) = j + k; \
1260 } \
1261 j += SZ; \
1262 } \
1263 } \
1264 \
1265 *t = tmp; \
1266 }
1267
1268 #define XXGENPCV_LE_COMP(NAME, SZ) \
1269 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1270 { \
1271 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1272 \
1273 /* Iterate over the most significant byte of each element */ \
1274 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1275 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1276 /* Update each byte of the element */ \
1277 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1278 /* Reverse indexing of "j" */ \
1279 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1280 tmp.VsrB(idx + rk) = i + k; \
1281 } \
1282 j += SZ; \
1283 } \
1284 } \
1285 \
1286 *t = tmp; \
1287 }
1288
1289 #define XXGENPCV(NAME, SZ) \
1290 XXGENPCV_BE_EXP(NAME, SZ) \
1291 XXGENPCV_BE_COMP(NAME, SZ) \
1292 XXGENPCV_LE_EXP(NAME, SZ) \
1293 XXGENPCV_LE_COMP(NAME, SZ) \
1294
1295 XXGENPCV(XXGENPCVBM, 1)
1296 XXGENPCV(XXGENPCVHM, 2)
1297 XXGENPCV(XXGENPCVWM, 4)
1298 XXGENPCV(XXGENPCVDM, 8)
1299
1300 #undef XXGENPCV_BE_EXP
1301 #undef XXGENPCV_BE_COMP
1302 #undef XXGENPCV_LE_EXP
1303 #undef XXGENPCV_LE_COMP
1304 #undef XXGENPCV
1305
1306 #if HOST_BIG_ENDIAN
1307 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1308 #define VBPERMD_INDEX(i) (i)
1309 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1310 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1311 #else
1312 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1313 #define VBPERMD_INDEX(i) (1 - i)
1314 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1315 #define EXTRACT_BIT(avr, i, index) \
1316 (extract64((avr)->u64[1 - i], 63 - index, 1))
1317 #endif
1318
1319 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1320 {
1321 int i, j;
1322 ppc_avr_t result = { .u64 = { 0, 0 } };
1323 VECTOR_FOR_INORDER_I(i, u64) {
1324 for (j = 0; j < 8; j++) {
1325 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1326 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1327 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1328 }
1329 }
1330 }
1331 *r = result;
1332 }
1333
1334 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1335 {
1336 int i;
1337 uint64_t perm = 0;
1338
1339 VECTOR_FOR_INORDER_I(i, u8) {
1340 int index = VBPERMQ_INDEX(b, i);
1341
1342 if (index < 128) {
1343 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1344 if (a->u64[VBPERMQ_DW(index)] & mask) {
1345 perm |= (0x8000 >> i);
1346 }
1347 }
1348 }
1349
1350 r->VsrD(0) = perm;
1351 r->VsrD(1) = 0;
1352 }
1353
1354 #undef VBPERMQ_INDEX
1355 #undef VBPERMQ_DW
1356
1357 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1358 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1359 { \
1360 int i, j; \
1361 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1362 \
1363 VECTOR_FOR_INORDER_I(i, srcfld) { \
1364 prod[i] = 0; \
1365 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1366 if (a->srcfld[i] & (1ull << j)) { \
1367 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1368 } \
1369 } \
1370 } \
1371 \
1372 VECTOR_FOR_INORDER_I(i, trgfld) { \
1373 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1374 } \
1375 }
1376
1377 PMSUM(vpmsumb, u8, u16, uint16_t)
1378 PMSUM(vpmsumh, u16, u32, uint32_t)
1379 PMSUM(vpmsumw, u32, u64, uint64_t)
1380
1381 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1382 {
1383
1384 #ifdef CONFIG_INT128
1385 int i, j;
1386 __uint128_t prod[2];
1387
1388 VECTOR_FOR_INORDER_I(i, u64) {
1389 prod[i] = 0;
1390 for (j = 0; j < 64; j++) {
1391 if (a->u64[i] & (1ull << j)) {
1392 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1393 }
1394 }
1395 }
1396
1397 r->u128 = prod[0] ^ prod[1];
1398
1399 #else
1400 int i, j;
1401 ppc_avr_t prod[2];
1402
1403 VECTOR_FOR_INORDER_I(i, u64) {
1404 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1405 for (j = 0; j < 64; j++) {
1406 if (a->u64[i] & (1ull << j)) {
1407 ppc_avr_t bshift;
1408 if (j == 0) {
1409 bshift.VsrD(0) = 0;
1410 bshift.VsrD(1) = b->u64[i];
1411 } else {
1412 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1413 bshift.VsrD(1) = b->u64[i] << j;
1414 }
1415 prod[i].VsrD(1) ^= bshift.VsrD(1);
1416 prod[i].VsrD(0) ^= bshift.VsrD(0);
1417 }
1418 }
1419 }
1420
1421 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1422 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1423 #endif
1424 }
1425
1426
1427 #if HOST_BIG_ENDIAN
1428 #define PKBIG 1
1429 #else
1430 #define PKBIG 0
1431 #endif
1432 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1433 {
1434 int i, j;
1435 ppc_avr_t result;
1436 #if HOST_BIG_ENDIAN
1437 const ppc_avr_t *x[2] = { a, b };
1438 #else
1439 const ppc_avr_t *x[2] = { b, a };
1440 #endif
1441
1442 VECTOR_FOR_INORDER_I(i, u64) {
1443 VECTOR_FOR_INORDER_I(j, u32) {
1444 uint32_t e = x[i]->u32[j];
1445
1446 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1447 ((e >> 6) & 0x3e0) |
1448 ((e >> 3) & 0x1f));
1449 }
1450 }
1451 *r = result;
1452 }
1453
1454 #define VPK(suffix, from, to, cvt, dosat) \
1455 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1456 ppc_avr_t *a, ppc_avr_t *b) \
1457 { \
1458 int i; \
1459 int sat = 0; \
1460 ppc_avr_t result; \
1461 ppc_avr_t *a0 = PKBIG ? a : b; \
1462 ppc_avr_t *a1 = PKBIG ? b : a; \
1463 \
1464 VECTOR_FOR_INORDER_I(i, from) { \
1465 result.to[i] = cvt(a0->from[i], &sat); \
1466 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1467 } \
1468 *r = result; \
1469 if (dosat && sat) { \
1470 set_vscr_sat(env); \
1471 } \
1472 }
1473 #define I(x, y) (x)
1474 VPK(shss, s16, s8, cvtshsb, 1)
1475 VPK(shus, s16, u8, cvtshub, 1)
1476 VPK(swss, s32, s16, cvtswsh, 1)
1477 VPK(swus, s32, u16, cvtswuh, 1)
1478 VPK(sdss, s64, s32, cvtsdsw, 1)
1479 VPK(sdus, s64, u32, cvtsduw, 1)
1480 VPK(uhus, u16, u8, cvtuhub, 1)
1481 VPK(uwus, u32, u16, cvtuwuh, 1)
1482 VPK(udus, u64, u32, cvtuduw, 1)
1483 VPK(uhum, u16, u8, I, 0)
1484 VPK(uwum, u32, u16, I, 0)
1485 VPK(udum, u64, u32, I, 0)
1486 #undef I
1487 #undef VPK
1488 #undef PKBIG
1489
1490 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1491 {
1492 int i;
1493
1494 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1495 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1496 }
1497 }
1498
1499 #define VRFI(suffix, rounding) \
1500 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1501 ppc_avr_t *b) \
1502 { \
1503 int i; \
1504 float_status s = env->vec_status; \
1505 \
1506 set_float_rounding_mode(rounding, &s); \
1507 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1508 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1509 } \
1510 }
1511 VRFI(n, float_round_nearest_even)
1512 VRFI(m, float_round_down)
1513 VRFI(p, float_round_up)
1514 VRFI(z, float_round_to_zero)
1515 #undef VRFI
1516
1517 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1518 {
1519 int i;
1520
1521 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1522 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1523
1524 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1525 }
1526 }
1527
1528 #define VRLMI(name, size, element, insert) \
1529 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1530 { \
1531 int i; \
1532 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1533 uint##size##_t src1 = a->element[i]; \
1534 uint##size##_t src2 = b->element[i]; \
1535 uint##size##_t src3 = r->element[i]; \
1536 uint##size##_t begin, end, shift, mask, rot_val; \
1537 \
1538 shift = extract##size(src2, 0, 6); \
1539 end = extract##size(src2, 8, 6); \
1540 begin = extract##size(src2, 16, 6); \
1541 rot_val = rol##size(src1, shift); \
1542 mask = mask_u##size(begin, end); \
1543 if (insert) { \
1544 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1545 } else { \
1546 r->element[i] = (rot_val & mask); \
1547 } \
1548 } \
1549 }
1550
1551 VRLMI(VRLDMI, 64, u64, 1);
1552 VRLMI(VRLWMI, 32, u32, 1);
1553 VRLMI(VRLDNM, 64, u64, 0);
1554 VRLMI(VRLWNM, 32, u32, 0);
1555
1556 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1557 {
1558 int i;
1559
1560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1561 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1562 }
1563 }
1564
1565 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1566 {
1567 int i;
1568
1569 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1570 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1571 }
1572 }
1573
1574 #define VEXTU_X_DO(name, size, left) \
1575 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1576 { \
1577 int index = (a & 0xf) * 8; \
1578 if (left) { \
1579 index = 128 - index - size; \
1580 } \
1581 return int128_getlo(int128_rshift(b->s128, index)) & \
1582 MAKE_64BIT_MASK(0, size); \
1583 }
1584 VEXTU_X_DO(vextublx, 8, 1)
1585 VEXTU_X_DO(vextuhlx, 16, 1)
1586 VEXTU_X_DO(vextuwlx, 32, 1)
1587 VEXTU_X_DO(vextubrx, 8, 0)
1588 VEXTU_X_DO(vextuhrx, 16, 0)
1589 VEXTU_X_DO(vextuwrx, 32, 0)
1590 #undef VEXTU_X_DO
1591
1592 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1593 {
1594 int i;
1595 unsigned int shift, bytes, size;
1596
1597 size = ARRAY_SIZE(r->u8);
1598 for (i = 0; i < size; i++) {
1599 shift = b->VsrB(i) & 0x7; /* extract shift value */
1600 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1601 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1602 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1603 }
1604 }
1605
1606 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1607 {
1608 int i;
1609 unsigned int shift, bytes;
1610
1611 /*
1612 * Use reverse order, as destination and source register can be
1613 * same. Its being modified in place saving temporary, reverse
1614 * order will guarantee that computed result is not fed back.
1615 */
1616 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1617 shift = b->VsrB(i) & 0x7; /* extract shift value */
1618 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1619 /* extract adjacent bytes */
1620 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1621 }
1622 }
1623
1624 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1625 {
1626 int sh = shift & 0xf;
1627 int i;
1628 ppc_avr_t result;
1629
1630 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1631 int index = sh + i;
1632 if (index > 0xf) {
1633 result.VsrB(i) = b->VsrB(index - 0x10);
1634 } else {
1635 result.VsrB(i) = a->VsrB(index);
1636 }
1637 }
1638 *r = result;
1639 }
1640
1641 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1642 {
1643 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1644
1645 #if HOST_BIG_ENDIAN
1646 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1647 memset(&r->u8[16 - sh], 0, sh);
1648 #else
1649 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1650 memset(&r->u8[0], 0, sh);
1651 #endif
1652 }
1653
1654 #if HOST_BIG_ENDIAN
1655 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1656 #else
1657 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1658 #endif
1659
1660 #define VINSX(SUFFIX, TYPE) \
1661 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1662 uint64_t val, target_ulong index) \
1663 { \
1664 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1665 target_long idx = index; \
1666 \
1667 if (idx < 0 || idx > maxidx) { \
1668 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1669 qemu_log_mask(LOG_GUEST_ERROR, \
1670 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1671 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1672 } else { \
1673 TYPE src = val; \
1674 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1675 } \
1676 }
1677 VINSX(B, uint8_t)
1678 VINSX(H, uint16_t)
1679 VINSX(W, uint32_t)
1680 VINSX(D, uint64_t)
1681 #undef ELEM_ADDR
1682 #undef VINSX
1683 #if HOST_BIG_ENDIAN
1684 #define VEXTDVLX(NAME, SIZE) \
1685 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1686 target_ulong index) \
1687 { \
1688 const target_long idx = index; \
1689 ppc_avr_t tmp[2] = { *a, *b }; \
1690 memset(t, 0, sizeof(*t)); \
1691 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1692 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1693 } else { \
1694 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1695 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1696 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1697 } \
1698 }
1699 #else
1700 #define VEXTDVLX(NAME, SIZE) \
1701 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1702 target_ulong index) \
1703 { \
1704 const target_long idx = index; \
1705 ppc_avr_t tmp[2] = { *b, *a }; \
1706 memset(t, 0, sizeof(*t)); \
1707 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1708 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1709 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1710 } else { \
1711 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1712 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1713 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1714 } \
1715 }
1716 #endif
1717 VEXTDVLX(VEXTDUBVLX, 1)
1718 VEXTDVLX(VEXTDUHVLX, 2)
1719 VEXTDVLX(VEXTDUWVLX, 4)
1720 VEXTDVLX(VEXTDDVLX, 8)
1721 #undef VEXTDVLX
1722 #if HOST_BIG_ENDIAN
1723 #define VEXTRACT(suffix, element) \
1724 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1725 { \
1726 uint32_t es = sizeof(r->element[0]); \
1727 memmove(&r->u8[8 - es], &b->u8[index], es); \
1728 memset(&r->u8[8], 0, 8); \
1729 memset(&r->u8[0], 0, 8 - es); \
1730 }
1731 #else
1732 #define VEXTRACT(suffix, element) \
1733 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1734 { \
1735 uint32_t es = sizeof(r->element[0]); \
1736 uint32_t s = (16 - index) - es; \
1737 memmove(&r->u8[8], &b->u8[s], es); \
1738 memset(&r->u8[0], 0, 8); \
1739 memset(&r->u8[8 + es], 0, 8 - es); \
1740 }
1741 #endif
1742 VEXTRACT(ub, u8)
1743 VEXTRACT(uh, u16)
1744 VEXTRACT(uw, u32)
1745 VEXTRACT(d, u64)
1746 #undef VEXTRACT
1747
1748 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1749 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1750 { \
1751 int i, idx, crf = 0; \
1752 \
1753 for (i = 0; i < NUM_ELEMS; i++) { \
1754 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1755 if (b->Vsr##ELEM(idx)) { \
1756 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1757 } else { \
1758 crf = 0b0010; \
1759 break; \
1760 } \
1761 } \
1762 \
1763 for (; i < NUM_ELEMS; i++) { \
1764 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1765 t->Vsr##ELEM(idx) = 0; \
1766 } \
1767 \
1768 return crf; \
1769 }
1770 VSTRI(VSTRIBL, B, 16, true)
1771 VSTRI(VSTRIBR, B, 16, false)
1772 VSTRI(VSTRIHL, H, 8, true)
1773 VSTRI(VSTRIHR, H, 8, false)
1774 #undef VSTRI
1775
1776 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1777 {
1778 ppc_vsr_t t = { };
1779 size_t es = sizeof(uint32_t);
1780 uint32_t ext_index;
1781 int i;
1782
1783 ext_index = index;
1784 for (i = 0; i < es; i++, ext_index++) {
1785 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1786 }
1787
1788 *xt = t;
1789 }
1790
1791 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1792 {
1793 ppc_vsr_t t = *xt;
1794 size_t es = sizeof(uint32_t);
1795 int ins_index, i = 0;
1796
1797 ins_index = index;
1798 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1799 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1800 }
1801
1802 *xt = t;
1803 }
1804
1805 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1806 uint32_t desc)
1807 {
1808 /*
1809 * Instead of processing imm bit-by-bit, we'll skip the computation of
1810 * conjunctions whose corresponding bit is unset.
1811 */
1812 int bit, imm = simd_data(desc);
1813 Int128 conj, disj = int128_zero();
1814
1815 /* Iterate over set bits from the least to the most significant bit */
1816 while (imm) {
1817 /*
1818 * Get the next bit to be processed with ctz64. Invert the result of
1819 * ctz64 to match the indexing used by PowerISA.
1820 */
1821 bit = 7 - ctzl(imm);
1822 if (bit & 0x4) {
1823 conj = a->s128;
1824 } else {
1825 conj = int128_not(a->s128);
1826 }
1827 if (bit & 0x2) {
1828 conj = int128_and(conj, b->s128);
1829 } else {
1830 conj = int128_and(conj, int128_not(b->s128));
1831 }
1832 if (bit & 0x1) {
1833 conj = int128_and(conj, c->s128);
1834 } else {
1835 conj = int128_and(conj, int128_not(c->s128));
1836 }
1837 disj = int128_or(disj, conj);
1838
1839 /* Unset the least significant bit that is set */
1840 imm &= imm - 1;
1841 }
1842
1843 t->s128 = disj;
1844 }
1845
1846 #define XXBLEND(name, sz) \
1847 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1848 ppc_avr_t *c, uint32_t desc) \
1849 { \
1850 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1851 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1852 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1853 } \
1854 }
1855 XXBLEND(B, 8)
1856 XXBLEND(H, 16)
1857 XXBLEND(W, 32)
1858 XXBLEND(D, 64)
1859 #undef XXBLEND
1860
1861 #define VNEG(name, element) \
1862 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1863 { \
1864 int i; \
1865 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1866 r->element[i] = -b->element[i]; \
1867 } \
1868 }
1869 VNEG(vnegw, s32)
1870 VNEG(vnegd, s64)
1871 #undef VNEG
1872
1873 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1874 {
1875 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1876
1877 #if HOST_BIG_ENDIAN
1878 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1879 memset(&r->u8[0], 0, sh);
1880 #else
1881 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1882 memset(&r->u8[16 - sh], 0, sh);
1883 #endif
1884 }
1885
1886 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1887 {
1888 int i;
1889
1890 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1891 r->u32[i] = a->u32[i] >= b->u32[i];
1892 }
1893 }
1894
1895 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1896 {
1897 int64_t t;
1898 int i, upper;
1899 ppc_avr_t result;
1900 int sat = 0;
1901
1902 upper = ARRAY_SIZE(r->s32) - 1;
1903 t = (int64_t)b->VsrSW(upper);
1904 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1905 t += a->VsrSW(i);
1906 result.VsrSW(i) = 0;
1907 }
1908 result.VsrSW(upper) = cvtsdsw(t, &sat);
1909 *r = result;
1910
1911 if (sat) {
1912 set_vscr_sat(env);
1913 }
1914 }
1915
1916 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1917 {
1918 int i, j, upper;
1919 ppc_avr_t result;
1920 int sat = 0;
1921
1922 upper = 1;
1923 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1924 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1925
1926 result.VsrD(i) = 0;
1927 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1928 t += a->VsrSW(2 * i + j);
1929 }
1930 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1931 }
1932
1933 *r = result;
1934 if (sat) {
1935 set_vscr_sat(env);
1936 }
1937 }
1938
1939 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1940 {
1941 int i, j;
1942 int sat = 0;
1943
1944 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1945 int64_t t = (int64_t)b->s32[i];
1946
1947 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1948 t += a->s8[4 * i + j];
1949 }
1950 r->s32[i] = cvtsdsw(t, &sat);
1951 }
1952
1953 if (sat) {
1954 set_vscr_sat(env);
1955 }
1956 }
1957
1958 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1959 {
1960 int sat = 0;
1961 int i;
1962
1963 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1964 int64_t t = (int64_t)b->s32[i];
1965
1966 t += a->s16[2 * i] + a->s16[2 * i + 1];
1967 r->s32[i] = cvtsdsw(t, &sat);
1968 }
1969
1970 if (sat) {
1971 set_vscr_sat(env);
1972 }
1973 }
1974
1975 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1976 {
1977 int i, j;
1978 int sat = 0;
1979
1980 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1981 uint64_t t = (uint64_t)b->u32[i];
1982
1983 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1984 t += a->u8[4 * i + j];
1985 }
1986 r->u32[i] = cvtuduw(t, &sat);
1987 }
1988
1989 if (sat) {
1990 set_vscr_sat(env);
1991 }
1992 }
1993
1994 #if HOST_BIG_ENDIAN
1995 #define UPKHI 1
1996 #define UPKLO 0
1997 #else
1998 #define UPKHI 0
1999 #define UPKLO 1
2000 #endif
2001 #define VUPKPX(suffix, hi) \
2002 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2003 { \
2004 int i; \
2005 ppc_avr_t result; \
2006 \
2007 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2008 uint16_t e = b->u16[hi ? i : i + 4]; \
2009 uint8_t a = (e >> 15) ? 0xff : 0; \
2010 uint8_t r = (e >> 10) & 0x1f; \
2011 uint8_t g = (e >> 5) & 0x1f; \
2012 uint8_t b = e & 0x1f; \
2013 \
2014 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2015 } \
2016 *r = result; \
2017 }
2018 VUPKPX(lpx, UPKLO)
2019 VUPKPX(hpx, UPKHI)
2020 #undef VUPKPX
2021
2022 #define VUPK(suffix, unpacked, packee, hi) \
2023 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2024 { \
2025 int i; \
2026 ppc_avr_t result; \
2027 \
2028 if (hi) { \
2029 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2030 result.unpacked[i] = b->packee[i]; \
2031 } \
2032 } else { \
2033 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2034 i++) { \
2035 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2036 } \
2037 } \
2038 *r = result; \
2039 }
2040 VUPK(hsb, s16, s8, UPKHI)
2041 VUPK(hsh, s32, s16, UPKHI)
2042 VUPK(hsw, s64, s32, UPKHI)
2043 VUPK(lsb, s16, s8, UPKLO)
2044 VUPK(lsh, s32, s16, UPKLO)
2045 VUPK(lsw, s64, s32, UPKLO)
2046 #undef VUPK
2047 #undef UPKHI
2048 #undef UPKLO
2049
2050 #define VGENERIC_DO(name, element) \
2051 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2052 { \
2053 int i; \
2054 \
2055 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2056 r->element[i] = name(b->element[i]); \
2057 } \
2058 }
2059
2060 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2061 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2062
2063 VGENERIC_DO(clzb, u8)
2064 VGENERIC_DO(clzh, u16)
2065
2066 #undef clzb
2067 #undef clzh
2068
2069 #define ctzb(v) ((v) ? ctz32(v) : 8)
2070 #define ctzh(v) ((v) ? ctz32(v) : 16)
2071 #define ctzw(v) ctz32((v))
2072 #define ctzd(v) ctz64((v))
2073
2074 VGENERIC_DO(ctzb, u8)
2075 VGENERIC_DO(ctzh, u16)
2076 VGENERIC_DO(ctzw, u32)
2077 VGENERIC_DO(ctzd, u64)
2078
2079 #undef ctzb
2080 #undef ctzh
2081 #undef ctzw
2082 #undef ctzd
2083
2084 #define popcntb(v) ctpop8(v)
2085 #define popcnth(v) ctpop16(v)
2086 #define popcntw(v) ctpop32(v)
2087 #define popcntd(v) ctpop64(v)
2088
2089 VGENERIC_DO(popcntb, u8)
2090 VGENERIC_DO(popcnth, u16)
2091 VGENERIC_DO(popcntw, u32)
2092 VGENERIC_DO(popcntd, u64)
2093
2094 #undef popcntb
2095 #undef popcnth
2096 #undef popcntw
2097 #undef popcntd
2098
2099 #undef VGENERIC_DO
2100
2101 #if HOST_BIG_ENDIAN
2102 #define QW_ONE { .u64 = { 0, 1 } }
2103 #else
2104 #define QW_ONE { .u64 = { 1, 0 } }
2105 #endif
2106
2107 #ifndef CONFIG_INT128
2108
2109 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2110 {
2111 t->u64[0] = ~a.u64[0];
2112 t->u64[1] = ~a.u64[1];
2113 }
2114
2115 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2116 {
2117 if (a.VsrD(0) < b.VsrD(0)) {
2118 return -1;
2119 } else if (a.VsrD(0) > b.VsrD(0)) {
2120 return 1;
2121 } else if (a.VsrD(1) < b.VsrD(1)) {
2122 return -1;
2123 } else if (a.VsrD(1) > b.VsrD(1)) {
2124 return 1;
2125 } else {
2126 return 0;
2127 }
2128 }
2129
2130 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2131 {
2132 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2133 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2134 (~a.VsrD(1) < b.VsrD(1));
2135 }
2136
2137 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2138 {
2139 ppc_avr_t not_a;
2140 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2141 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2142 (~a.VsrD(1) < b.VsrD(1));
2143 avr_qw_not(&not_a, a);
2144 return avr_qw_cmpu(not_a, b) < 0;
2145 }
2146
2147 #endif
2148
2149 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2150 {
2151 #ifdef CONFIG_INT128
2152 r->u128 = a->u128 + b->u128;
2153 #else
2154 avr_qw_add(r, *a, *b);
2155 #endif
2156 }
2157
2158 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2159 {
2160 #ifdef CONFIG_INT128
2161 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2162 #else
2163
2164 if (c->VsrD(1) & 1) {
2165 ppc_avr_t tmp;
2166
2167 tmp.VsrD(0) = 0;
2168 tmp.VsrD(1) = c->VsrD(1) & 1;
2169 avr_qw_add(&tmp, *a, tmp);
2170 avr_qw_add(r, tmp, *b);
2171 } else {
2172 avr_qw_add(r, *a, *b);
2173 }
2174 #endif
2175 }
2176
2177 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2178 {
2179 #ifdef CONFIG_INT128
2180 r->u128 = (~a->u128 < b->u128);
2181 #else
2182 ppc_avr_t not_a;
2183
2184 avr_qw_not(&not_a, *a);
2185
2186 r->VsrD(0) = 0;
2187 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2188 #endif
2189 }
2190
2191 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2192 {
2193 #ifdef CONFIG_INT128
2194 int carry_out = (~a->u128 < b->u128);
2195 if (!carry_out && (c->u128 & 1)) {
2196 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2197 ((a->u128 != 0) || (b->u128 != 0));
2198 }
2199 r->u128 = carry_out;
2200 #else
2201
2202 int carry_in = c->VsrD(1) & 1;
2203 int carry_out = 0;
2204 ppc_avr_t tmp;
2205
2206 carry_out = avr_qw_addc(&tmp, *a, *b);
2207
2208 if (!carry_out && carry_in) {
2209 ppc_avr_t one = QW_ONE;
2210 carry_out = avr_qw_addc(&tmp, tmp, one);
2211 }
2212 r->VsrD(0) = 0;
2213 r->VsrD(1) = carry_out;
2214 #endif
2215 }
2216
2217 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2218 {
2219 #ifdef CONFIG_INT128
2220 r->u128 = a->u128 - b->u128;
2221 #else
2222 ppc_avr_t tmp;
2223 ppc_avr_t one = QW_ONE;
2224
2225 avr_qw_not(&tmp, *b);
2226 avr_qw_add(&tmp, *a, tmp);
2227 avr_qw_add(r, tmp, one);
2228 #endif
2229 }
2230
2231 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2232 {
2233 #ifdef CONFIG_INT128
2234 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2235 #else
2236 ppc_avr_t tmp, sum;
2237
2238 avr_qw_not(&tmp, *b);
2239 avr_qw_add(&sum, *a, tmp);
2240
2241 tmp.VsrD(0) = 0;
2242 tmp.VsrD(1) = c->VsrD(1) & 1;
2243 avr_qw_add(r, sum, tmp);
2244 #endif
2245 }
2246
2247 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2248 {
2249 #ifdef CONFIG_INT128
2250 r->u128 = (~a->u128 < ~b->u128) ||
2251 (a->u128 + ~b->u128 == (__uint128_t)-1);
2252 #else
2253 int carry = (avr_qw_cmpu(*a, *b) > 0);
2254 if (!carry) {
2255 ppc_avr_t tmp;
2256 avr_qw_not(&tmp, *b);
2257 avr_qw_add(&tmp, *a, tmp);
2258 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2259 }
2260 r->VsrD(0) = 0;
2261 r->VsrD(1) = carry;
2262 #endif
2263 }
2264
2265 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2266 {
2267 #ifdef CONFIG_INT128
2268 r->u128 =
2269 (~a->u128 < ~b->u128) ||
2270 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2271 #else
2272 int carry_in = c->VsrD(1) & 1;
2273 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2274 if (!carry_out && carry_in) {
2275 ppc_avr_t tmp;
2276 avr_qw_not(&tmp, *b);
2277 avr_qw_add(&tmp, *a, tmp);
2278 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2279 }
2280
2281 r->VsrD(0) = 0;
2282 r->VsrD(1) = carry_out;
2283 #endif
2284 }
2285
2286 #define BCD_PLUS_PREF_1 0xC
2287 #define BCD_PLUS_PREF_2 0xF
2288 #define BCD_PLUS_ALT_1 0xA
2289 #define BCD_NEG_PREF 0xD
2290 #define BCD_NEG_ALT 0xB
2291 #define BCD_PLUS_ALT_2 0xE
2292 #define NATIONAL_PLUS 0x2B
2293 #define NATIONAL_NEG 0x2D
2294
2295 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2296
2297 static int bcd_get_sgn(ppc_avr_t *bcd)
2298 {
2299 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2300 case BCD_PLUS_PREF_1:
2301 case BCD_PLUS_PREF_2:
2302 case BCD_PLUS_ALT_1:
2303 case BCD_PLUS_ALT_2:
2304 {
2305 return 1;
2306 }
2307
2308 case BCD_NEG_PREF:
2309 case BCD_NEG_ALT:
2310 {
2311 return -1;
2312 }
2313
2314 default:
2315 {
2316 return 0;
2317 }
2318 }
2319 }
2320
2321 static int bcd_preferred_sgn(int sgn, int ps)
2322 {
2323 if (sgn >= 0) {
2324 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2325 } else {
2326 return BCD_NEG_PREF;
2327 }
2328 }
2329
2330 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2331 {
2332 uint8_t result;
2333 if (n & 1) {
2334 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2335 } else {
2336 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2337 }
2338
2339 if (unlikely(result > 9)) {
2340 *invalid = true;
2341 }
2342 return result;
2343 }
2344
2345 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2346 {
2347 if (n & 1) {
2348 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2349 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2350 } else {
2351 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2352 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2353 }
2354 }
2355
2356 static bool bcd_is_valid(ppc_avr_t *bcd)
2357 {
2358 int i;
2359 int invalid = 0;
2360
2361 if (bcd_get_sgn(bcd) == 0) {
2362 return false;
2363 }
2364
2365 for (i = 1; i < 32; i++) {
2366 bcd_get_digit(bcd, i, &invalid);
2367 if (unlikely(invalid)) {
2368 return false;
2369 }
2370 }
2371 return true;
2372 }
2373
2374 static int bcd_cmp_zero(ppc_avr_t *bcd)
2375 {
2376 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2377 return CRF_EQ;
2378 } else {
2379 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2380 }
2381 }
2382
2383 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2384 {
2385 return reg->VsrH(7 - n);
2386 }
2387
2388 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2389 {
2390 reg->VsrH(7 - n) = val;
2391 }
2392
2393 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2394 {
2395 int i;
2396 int invalid = 0;
2397 for (i = 31; i > 0; i--) {
2398 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2399 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2400 if (unlikely(invalid)) {
2401 return 0; /* doesn't matter */
2402 } else if (dig_a > dig_b) {
2403 return 1;
2404 } else if (dig_a < dig_b) {
2405 return -1;
2406 }
2407 }
2408
2409 return 0;
2410 }
2411
2412 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2413 int *overflow)
2414 {
2415 int carry = 0;
2416 int i;
2417 int is_zero = 1;
2418
2419 for (i = 1; i <= 31; i++) {
2420 uint8_t digit = bcd_get_digit(a, i, invalid) +
2421 bcd_get_digit(b, i, invalid) + carry;
2422 is_zero &= (digit == 0);
2423 if (digit > 9) {
2424 carry = 1;
2425 digit -= 10;
2426 } else {
2427 carry = 0;
2428 }
2429
2430 bcd_put_digit(t, digit, i);
2431 }
2432
2433 *overflow = carry;
2434 return is_zero;
2435 }
2436
2437 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2438 int *overflow)
2439 {
2440 int carry = 0;
2441 int i;
2442
2443 for (i = 1; i <= 31; i++) {
2444 uint8_t digit = bcd_get_digit(a, i, invalid) -
2445 bcd_get_digit(b, i, invalid) + carry;
2446 if (digit & 0x80) {
2447 carry = -1;
2448 digit += 10;
2449 } else {
2450 carry = 0;
2451 }
2452
2453 bcd_put_digit(t, digit, i);
2454 }
2455
2456 *overflow = carry;
2457 }
2458
2459 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2460 {
2461
2462 int sgna = bcd_get_sgn(a);
2463 int sgnb = bcd_get_sgn(b);
2464 int invalid = (sgna == 0) || (sgnb == 0);
2465 int overflow = 0;
2466 int zero = 0;
2467 uint32_t cr = 0;
2468 ppc_avr_t result = { .u64 = { 0, 0 } };
2469
2470 if (!invalid) {
2471 if (sgna == sgnb) {
2472 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2473 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2474 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2475 } else {
2476 int magnitude = bcd_cmp_mag(a, b);
2477 if (magnitude > 0) {
2478 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2479 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2480 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2481 } else if (magnitude < 0) {
2482 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2483 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2484 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2485 } else {
2486 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2487 cr = CRF_EQ;
2488 }
2489 }
2490 }
2491
2492 if (unlikely(invalid)) {
2493 result.VsrD(0) = result.VsrD(1) = -1;
2494 cr = CRF_SO;
2495 } else if (overflow) {
2496 cr |= CRF_SO;
2497 } else if (zero) {
2498 cr |= CRF_EQ;
2499 }
2500
2501 *r = result;
2502
2503 return cr;
2504 }
2505
2506 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2507 {
2508 ppc_avr_t bcopy = *b;
2509 int sgnb = bcd_get_sgn(b);
2510 if (sgnb < 0) {
2511 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2512 } else if (sgnb > 0) {
2513 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2514 }
2515 /* else invalid ... defer to bcdadd code for proper handling */
2516
2517 return helper_bcdadd(r, a, &bcopy, ps);
2518 }
2519
2520 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2521 {
2522 int i;
2523 int cr = 0;
2524 uint16_t national = 0;
2525 uint16_t sgnb = get_national_digit(b, 0);
2526 ppc_avr_t ret = { .u64 = { 0, 0 } };
2527 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2528
2529 for (i = 1; i < 8; i++) {
2530 national = get_national_digit(b, i);
2531 if (unlikely(national < 0x30 || national > 0x39)) {
2532 invalid = 1;
2533 break;
2534 }
2535
2536 bcd_put_digit(&ret, national & 0xf, i);
2537 }
2538
2539 if (sgnb == NATIONAL_PLUS) {
2540 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2541 } else {
2542 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2543 }
2544
2545 cr = bcd_cmp_zero(&ret);
2546
2547 if (unlikely(invalid)) {
2548 cr = CRF_SO;
2549 }
2550
2551 *r = ret;
2552
2553 return cr;
2554 }
2555
2556 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2557 {
2558 int i;
2559 int cr = 0;
2560 int sgnb = bcd_get_sgn(b);
2561 int invalid = (sgnb == 0);
2562 ppc_avr_t ret = { .u64 = { 0, 0 } };
2563
2564 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2565
2566 for (i = 1; i < 8; i++) {
2567 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2568
2569 if (unlikely(invalid)) {
2570 break;
2571 }
2572 }
2573 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2574
2575 cr = bcd_cmp_zero(b);
2576
2577 if (ox_flag) {
2578 cr |= CRF_SO;
2579 }
2580
2581 if (unlikely(invalid)) {
2582 cr = CRF_SO;
2583 }
2584
2585 *r = ret;
2586
2587 return cr;
2588 }
2589
2590 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2591 {
2592 int i;
2593 int cr = 0;
2594 int invalid = 0;
2595 int zone_digit = 0;
2596 int zone_lead = ps ? 0xF : 0x3;
2597 int digit = 0;
2598 ppc_avr_t ret = { .u64 = { 0, 0 } };
2599 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2600
2601 if (unlikely((sgnb < 0xA) && ps)) {
2602 invalid = 1;
2603 }
2604
2605 for (i = 0; i < 16; i++) {
2606 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2607 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2608 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2609 invalid = 1;
2610 break;
2611 }
2612
2613 bcd_put_digit(&ret, digit, i + 1);
2614 }
2615
2616 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2617 (!ps && (sgnb & 0x4))) {
2618 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2619 } else {
2620 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2621 }
2622
2623 cr = bcd_cmp_zero(&ret);
2624
2625 if (unlikely(invalid)) {
2626 cr = CRF_SO;
2627 }
2628
2629 *r = ret;
2630
2631 return cr;
2632 }
2633
2634 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2635 {
2636 int i;
2637 int cr = 0;
2638 uint8_t digit = 0;
2639 int sgnb = bcd_get_sgn(b);
2640 int zone_lead = (ps) ? 0xF0 : 0x30;
2641 int invalid = (sgnb == 0);
2642 ppc_avr_t ret = { .u64 = { 0, 0 } };
2643
2644 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2645
2646 for (i = 0; i < 16; i++) {
2647 digit = bcd_get_digit(b, i + 1, &invalid);
2648
2649 if (unlikely(invalid)) {
2650 break;
2651 }
2652
2653 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2654 }
2655
2656 if (ps) {
2657 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2658 } else {
2659 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2660 }
2661
2662 cr = bcd_cmp_zero(b);
2663
2664 if (ox_flag) {
2665 cr |= CRF_SO;
2666 }
2667
2668 if (unlikely(invalid)) {
2669 cr = CRF_SO;
2670 }
2671
2672 *r = ret;
2673
2674 return cr;
2675 }
2676
2677 /**
2678 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2679 *
2680 * Returns:
2681 * > 0 if ahi|alo > bhi|blo,
2682 * 0 if ahi|alo == bhi|blo,
2683 * < 0 if ahi|alo < bhi|blo
2684 */
2685 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2686 uint64_t blo, uint64_t bhi)
2687 {
2688 return (ahi == bhi) ?
2689 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2690 (ahi > bhi ? 1 : -1);
2691 }
2692
2693 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2694 {
2695 int i;
2696 int cr;
2697 uint64_t lo_value;
2698 uint64_t hi_value;
2699 uint64_t rem;
2700 ppc_avr_t ret = { .u64 = { 0, 0 } };
2701
2702 if (b->VsrSD(0) < 0) {
2703 lo_value = -b->VsrSD(1);
2704 hi_value = ~b->VsrD(0) + !lo_value;
2705 bcd_put_digit(&ret, 0xD, 0);
2706
2707 cr = CRF_LT;
2708 } else {
2709 lo_value = b->VsrD(1);
2710 hi_value = b->VsrD(0);
2711 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2712
2713 if (hi_value == 0 && lo_value == 0) {
2714 cr = CRF_EQ;
2715 } else {
2716 cr = CRF_GT;
2717 }
2718 }
2719
2720 /*
2721 * Check src limits: abs(src) <= 10^31 - 1
2722 *
2723 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2724 */
2725 if (ucmp128(lo_value, hi_value,
2726 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2727 cr |= CRF_SO;
2728
2729 /*
2730 * According to the ISA, if src wouldn't fit in the destination
2731 * register, the result is undefined.
2732 * In that case, we leave r unchanged.
2733 */
2734 } else {
2735 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2736
2737 for (i = 1; i < 16; rem /= 10, i++) {
2738 bcd_put_digit(&ret, rem % 10, i);
2739 }
2740
2741 for (; i < 32; lo_value /= 10, i++) {
2742 bcd_put_digit(&ret, lo_value % 10, i);
2743 }
2744
2745 *r = ret;
2746 }
2747
2748 return cr;
2749 }
2750
2751 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2752 {
2753 uint8_t i;
2754 int cr;
2755 uint64_t carry;
2756 uint64_t unused;
2757 uint64_t lo_value;
2758 uint64_t hi_value = 0;
2759 int sgnb = bcd_get_sgn(b);
2760 int invalid = (sgnb == 0);
2761
2762 lo_value = bcd_get_digit(b, 31, &invalid);
2763 for (i = 30; i > 0; i--) {
2764 mulu64(&lo_value, &carry, lo_value, 10ULL);
2765 mulu64(&hi_value, &unused, hi_value, 10ULL);
2766 lo_value += bcd_get_digit(b, i, &invalid);
2767 hi_value += carry;
2768
2769 if (unlikely(invalid)) {
2770 break;
2771 }
2772 }
2773
2774 if (sgnb == -1) {
2775 r->VsrSD(1) = -lo_value;
2776 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2777 } else {
2778 r->VsrSD(1) = lo_value;
2779 r->VsrSD(0) = hi_value;
2780 }
2781
2782 cr = bcd_cmp_zero(b);
2783
2784 if (unlikely(invalid)) {
2785 cr = CRF_SO;
2786 }
2787
2788 return cr;
2789 }
2790
2791 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2792 {
2793 int i;
2794 int invalid = 0;
2795
2796 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2797 return CRF_SO;
2798 }
2799
2800 *r = *a;
2801 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2802
2803 for (i = 1; i < 32; i++) {
2804 bcd_get_digit(a, i, &invalid);
2805 bcd_get_digit(b, i, &invalid);
2806 if (unlikely(invalid)) {
2807 return CRF_SO;
2808 }
2809 }
2810
2811 return bcd_cmp_zero(r);
2812 }
2813
2814 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2815 {
2816 int sgnb = bcd_get_sgn(b);
2817
2818 *r = *b;
2819 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2820
2821 if (bcd_is_valid(b) == false) {
2822 return CRF_SO;
2823 }
2824
2825 return bcd_cmp_zero(r);
2826 }
2827
2828 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2829 {
2830 int cr;
2831 int i = a->VsrSB(7);
2832 bool ox_flag = false;
2833 int sgnb = bcd_get_sgn(b);
2834 ppc_avr_t ret = *b;
2835 ret.VsrD(1) &= ~0xf;
2836
2837 if (bcd_is_valid(b) == false) {
2838 return CRF_SO;
2839 }
2840
2841 if (unlikely(i > 31)) {
2842 i = 31;
2843 } else if (unlikely(i < -31)) {
2844 i = -31;
2845 }
2846
2847 if (i > 0) {
2848 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2849 } else {
2850 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2851 }
2852 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2853
2854 *r = ret;
2855
2856 cr = bcd_cmp_zero(r);
2857 if (ox_flag) {
2858 cr |= CRF_SO;
2859 }
2860
2861 return cr;
2862 }
2863
2864 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2865 {
2866 int cr;
2867 int i;
2868 int invalid = 0;
2869 bool ox_flag = false;
2870 ppc_avr_t ret = *b;
2871
2872 for (i = 0; i < 32; i++) {
2873 bcd_get_digit(b, i, &invalid);
2874
2875 if (unlikely(invalid)) {
2876 return CRF_SO;
2877 }
2878 }
2879
2880 i = a->VsrSB(7);
2881 if (i >= 32) {
2882 ox_flag = true;
2883 ret.VsrD(1) = ret.VsrD(0) = 0;
2884 } else if (i <= -32) {
2885 ret.VsrD(1) = ret.VsrD(0) = 0;
2886 } else if (i > 0) {
2887 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2888 } else {
2889 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2890 }
2891 *r = ret;
2892
2893 cr = bcd_cmp_zero(r);
2894 if (ox_flag) {
2895 cr |= CRF_SO;
2896 }
2897
2898 return cr;
2899 }
2900
2901 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2902 {
2903 int cr;
2904 int unused = 0;
2905 int invalid = 0;
2906 bool ox_flag = false;
2907 int sgnb = bcd_get_sgn(b);
2908 ppc_avr_t ret = *b;
2909 ret.VsrD(1) &= ~0xf;
2910
2911 int i = a->VsrSB(7);
2912 ppc_avr_t bcd_one;
2913
2914 bcd_one.VsrD(0) = 0;
2915 bcd_one.VsrD(1) = 0x10;
2916
2917 if (bcd_is_valid(b) == false) {
2918 return CRF_SO;
2919 }
2920
2921 if (unlikely(i > 31)) {
2922 i = 31;
2923 } else if (unlikely(i < -31)) {
2924 i = -31;
2925 }
2926
2927 if (i > 0) {
2928 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2929 } else {
2930 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2931
2932 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2933 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2934 }
2935 }
2936 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2937
2938 cr = bcd_cmp_zero(&ret);
2939 if (ox_flag) {
2940 cr |= CRF_SO;
2941 }
2942 *r = ret;
2943
2944 return cr;
2945 }
2946
2947 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2948 {
2949 uint64_t mask;
2950 uint32_t ox_flag = 0;
2951 int i = a->VsrSH(3) + 1;
2952 ppc_avr_t ret = *b;
2953
2954 if (bcd_is_valid(b) == false) {
2955 return CRF_SO;
2956 }
2957
2958 if (i > 16 && i < 32) {
2959 mask = (uint64_t)-1 >> (128 - i * 4);
2960 if (ret.VsrD(0) & ~mask) {
2961 ox_flag = CRF_SO;
2962 }
2963
2964 ret.VsrD(0) &= mask;
2965 } else if (i >= 0 && i <= 16) {
2966 mask = (uint64_t)-1 >> (64 - i * 4);
2967 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2968 ox_flag = CRF_SO;
2969 }
2970
2971 ret.VsrD(1) &= mask;
2972 ret.VsrD(0) = 0;
2973 }
2974 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2975 *r = ret;
2976
2977 return bcd_cmp_zero(&ret) | ox_flag;
2978 }
2979
2980 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2981 {
2982 int i;
2983 uint64_t mask;
2984 uint32_t ox_flag = 0;
2985 int invalid = 0;
2986 ppc_avr_t ret = *b;
2987
2988 for (i = 0; i < 32; i++) {
2989 bcd_get_digit(b, i, &invalid);
2990
2991 if (unlikely(invalid)) {
2992 return CRF_SO;
2993 }
2994 }
2995
2996 i = a->VsrSH(3);
2997 if (i > 16 && i < 33) {
2998 mask = (uint64_t)-1 >> (128 - i * 4);
2999 if (ret.VsrD(0) & ~mask) {
3000 ox_flag = CRF_SO;
3001 }
3002
3003 ret.VsrD(0) &= mask;
3004 } else if (i > 0 && i <= 16) {
3005 mask = (uint64_t)-1 >> (64 - i * 4);
3006 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3007 ox_flag = CRF_SO;
3008 }
3009
3010 ret.VsrD(1) &= mask;
3011 ret.VsrD(0) = 0;
3012 } else if (i == 0) {
3013 if (ret.VsrD(0) || ret.VsrD(1)) {
3014 ox_flag = CRF_SO;
3015 }
3016 ret.VsrD(0) = ret.VsrD(1) = 0;
3017 }
3018
3019 *r = ret;
3020 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3021 return ox_flag | CRF_EQ;
3022 }
3023
3024 return ox_flag | CRF_GT;
3025 }
3026
3027 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3028 {
3029 int i;
3030 VECTOR_FOR_INORDER_I(i, u8) {
3031 r->u8[i] = AES_sbox[a->u8[i]];
3032 }
3033 }
3034
3035 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3036 {
3037 ppc_avr_t result;
3038 int i;
3039
3040 VECTOR_FOR_INORDER_I(i, u32) {
3041 result.VsrW(i) = b->VsrW(i) ^
3042 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3043 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3044 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3045 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3046 }
3047 *r = result;
3048 }
3049
3050 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3051 {
3052 ppc_avr_t result;
3053 int i;
3054
3055 VECTOR_FOR_INORDER_I(i, u8) {
3056 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3057 }
3058 *r = result;
3059 }
3060
3061 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3062 {
3063 /* This differs from what is written in ISA V2.07. The RTL is */
3064 /* incorrect and will be fixed in V2.07B. */
3065 int i;
3066 ppc_avr_t tmp;
3067
3068 VECTOR_FOR_INORDER_I(i, u8) {
3069 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3070 }
3071
3072 VECTOR_FOR_INORDER_I(i, u32) {
3073 r->VsrW(i) =
3074 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3075 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3076 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3077 AES_imc[tmp.VsrB(4 * i + 3)][3];
3078 }
3079 }
3080
3081 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3082 {
3083 ppc_avr_t result;
3084 int i;
3085
3086 VECTOR_FOR_INORDER_I(i, u8) {
3087 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3088 }
3089 *r = result;
3090 }
3091
3092 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3093 {
3094 int st = (st_six & 0x10) != 0;
3095 int six = st_six & 0xF;
3096 int i;
3097
3098 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3099 if (st == 0) {
3100 if ((six & (0x8 >> i)) == 0) {
3101 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3102 ror32(a->VsrW(i), 18) ^
3103 (a->VsrW(i) >> 3);
3104 } else { /* six.bit[i] == 1 */
3105 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3106 ror32(a->VsrW(i), 19) ^
3107 (a->VsrW(i) >> 10);
3108 }
3109 } else { /* st == 1 */
3110 if ((six & (0x8 >> i)) == 0) {
3111 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3112 ror32(a->VsrW(i), 13) ^
3113 ror32(a->VsrW(i), 22);
3114 } else { /* six.bit[i] == 1 */
3115 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3116 ror32(a->VsrW(i), 11) ^
3117 ror32(a->VsrW(i), 25);
3118 }
3119 }
3120 }
3121 }
3122
3123 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3124 {
3125 int st = (st_six & 0x10) != 0;
3126 int six = st_six & 0xF;
3127 int i;
3128
3129 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3130 if (st == 0) {
3131 if ((six & (0x8 >> (2 * i))) == 0) {
3132 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3133 ror64(a->VsrD(i), 8) ^
3134 (a->VsrD(i) >> 7);
3135 } else { /* six.bit[2*i] == 1 */
3136 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3137 ror64(a->VsrD(i), 61) ^
3138 (a->VsrD(i) >> 6);
3139 }
3140 } else { /* st == 1 */
3141 if ((six & (0x8 >> (2 * i))) == 0) {
3142 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3143 ror64(a->VsrD(i), 34) ^
3144 ror64(a->VsrD(i), 39);
3145 } else { /* six.bit[2*i] == 1 */
3146 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3147 ror64(a->VsrD(i), 18) ^
3148 ror64(a->VsrD(i), 41);
3149 }
3150 }
3151 }
3152 }
3153
3154 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3155 {
3156 ppc_avr_t result;
3157 int i;
3158
3159 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3160 int indexA = c->VsrB(i) >> 4;
3161 int indexB = c->VsrB(i) & 0xF;
3162
3163 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3164 }
3165 *r = result;
3166 }
3167
3168 #undef VECTOR_FOR_INORDER_I
3169
3170 /*****************************************************************************/
3171 /* SPE extension helpers */
3172 /* Use a table to make this quicker */
3173 static const uint8_t hbrev[16] = {
3174 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3175 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3176 };
3177
3178 static inline uint8_t byte_reverse(uint8_t val)
3179 {
3180 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3181 }
3182
3183 static inline uint32_t word_reverse(uint32_t val)
3184 {
3185 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3186 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3187 }
3188
3189 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3190 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3191 {
3192 uint32_t a, b, d, mask;
3193
3194 mask = UINT32_MAX >> (32 - MASKBITS);
3195 a = arg1 & mask;
3196 b = arg2 & mask;
3197 d = word_reverse(1 + word_reverse(a | ~b));
3198 return (arg1 & ~mask) | (d & b);
3199 }
3200
3201 uint32_t helper_cntlsw32(uint32_t val)
3202 {
3203 if (val & 0x80000000) {
3204 return clz32(~val);
3205 } else {
3206 return clz32(val);
3207 }
3208 }
3209
3210 uint32_t helper_cntlzw32(uint32_t val)
3211 {
3212 return clz32(val);
3213 }
3214
3215 /* 440 specific */
3216 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3217 target_ulong low, uint32_t update_Rc)
3218 {
3219 target_ulong mask;
3220 int i;
3221
3222 i = 1;
3223 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3224 if ((high & mask) == 0) {
3225 if (update_Rc) {
3226 env->crf[0] = 0x4;
3227 }
3228 goto done;
3229 }
3230 i++;
3231 }
3232 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3233 if ((low & mask) == 0) {
3234 if (update_Rc) {
3235 env->crf[0] = 0x8;
3236 }
3237 goto done;
3238 }
3239 i++;
3240 }
3241 i = 8;
3242 if (update_Rc) {
3243 env->crf[0] = 0x2;
3244 }
3245 done:
3246 env->xer = (env->xer & ~0x7F) | i;
3247 if (update_Rc) {
3248 env->crf[0] |= xer_so;
3249 }
3250 return i;
3251 }