]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target/ppc: Implement xxpermx instruction
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
35
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 {
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
42 }
43 }
44
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
47 {
48 uint64_t rt = 0;
49 int overflow = 0;
50
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
53
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
59 }
60
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
63 }
64
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
67 }
68
69 return (target_ulong)rt;
70 }
71
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
74 {
75 int64_t rt = 0;
76 int overflow = 0;
77
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
80
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
87 }
88
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
91 }
92
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
95 }
96
97 return (target_ulong)rt;
98 }
99
100 #if defined(TARGET_PPC64)
101
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 {
104 uint64_t rt = 0;
105 int overflow = 0;
106
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
112 }
113
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
116 }
117
118 return rt;
119 }
120
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 {
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
127
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
133 }
134
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
137 }
138
139 return rt;
140 }
141
142 #endif
143
144
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
148
149 /*
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
154 */
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
156
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
159
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
161 {
162 return hasvalue(rb, ra) ? CRF_GT : 0;
163 }
164
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
168
169 /*
170 * Return a random number.
171 */
172 uint64_t helper_darn32(void)
173 {
174 Error *err = NULL;
175 uint32_t ret;
176
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
182 }
183
184 return ret;
185 }
186
187 uint64_t helper_darn64(void)
188 {
189 Error *err = NULL;
190 uint64_t ret;
191
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
197 }
198
199 return ret;
200 }
201
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
203 {
204 int i;
205 uint64_t ra = 0;
206
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
212 }
213 }
214 }
215 return ra;
216 }
217
218 #endif
219
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
221 {
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
225
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
229 }
230 mask <<= 8;
231 }
232 return ra;
233 }
234
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
238 {
239 int32_t ret;
240
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
249 }
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
253 }
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
257 }
258 return (target_long)ret;
259 }
260
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
264 {
265 int64_t ret;
266
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
275 }
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
279 }
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
283 }
284 return ret;
285 }
286 #endif
287
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
299 }
300
301 target_ulong helper_popcntw(target_ulong val)
302 {
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
315 }
316 #else
317 target_ulong helper_popcntb(target_ulong val)
318 {
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
324 }
325 #endif
326
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
328 {
329 /*
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
334 */
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
338
339 if (mask == 0 || mask == -1) {
340 return src;
341 }
342
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
349 }
350
351 /*
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
355 */
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
361 }
362
363 /*
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
368 */
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
374 }
375
376 /*
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
379 */
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
384 }
385
386 return left | (right >> n);
387 }
388
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
390 {
391 int i, o;
392 uint64_t result = 0;
393
394 if (mask == -1) {
395 return src;
396 }
397
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
402 }
403
404 return result;
405 }
406
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
408 {
409 int i, o;
410 uint64_t result = 0;
411
412 if (mask == -1) {
413 return src;
414 }
415
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
420 }
421
422 return result;
423 }
424
425 /*****************************************************************************/
426 /* Altivec extension helpers */
427 #if defined(HOST_WORDS_BIGENDIAN)
428 #define VECTOR_FOR_INORDER_I(index, element) \
429 for (index = 0; index < ARRAY_SIZE(r->element); index++)
430 #else
431 #define VECTOR_FOR_INORDER_I(index, element) \
432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
433 #endif
434
435 /* Saturating arithmetic helpers. */
436 #define SATCVT(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x < (from_type)min) { \
442 r = min; \
443 *sat = 1; \
444 } else if (x > (from_type)max) { \
445 r = max; \
446 *sat = 1; \
447 } else { \
448 r = x; \
449 } \
450 return r; \
451 }
452 #define SATCVTU(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
454 { \
455 to_type r; \
456 \
457 if (x > (from_type)max) { \
458 r = max; \
459 *sat = 1; \
460 } else { \
461 r = x; \
462 } \
463 return r; \
464 }
465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
468
469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
475 #undef SATCVT
476 #undef SATCVTU
477
478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
479 {
480 ppc_store_vscr(env, vscr);
481 }
482
483 uint32_t helper_mfvscr(CPUPPCState *env)
484 {
485 return ppc_get_vscr(env);
486 }
487
488 static inline void set_vscr_sat(CPUPPCState *env)
489 {
490 /* The choice of non-zero value is arbitrary. */
491 env->vscr_sat.u32[0] = 1;
492 }
493
494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
495 {
496 int i;
497
498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
499 r->u32[i] = ~a->u32[i] < b->u32[i];
500 }
501 }
502
503 /* vprtybw */
504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
505 {
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
509 res ^= res >> 8;
510 r->u32[i] = res & 1;
511 }
512 }
513
514 /* vprtybd */
515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
516 {
517 int i;
518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->u64[i] = res & 1;
523 }
524 }
525
526 /* vprtybq */
527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
528 {
529 uint64_t res = b->u64[0] ^ b->u64[1];
530 res ^= res >> 32;
531 res ^= res >> 16;
532 res ^= res >> 8;
533 r->VsrD(1) = res & 1;
534 r->VsrD(0) = 0;
535 }
536
537 #define VARITHFP(suffix, func) \
538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
540 { \
541 int i; \
542 \
543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
545 } \
546 }
547 VARITHFP(addfp, float32_add)
548 VARITHFP(subfp, float32_sub)
549 VARITHFP(minfp, float32_min)
550 VARITHFP(maxfp, float32_max)
551 #undef VARITHFP
552
553 #define VARITHFPFMA(suffix, type) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b, ppc_avr_t *c) \
556 { \
557 int i; \
558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
560 type, &env->vec_status); \
561 } \
562 }
563 VARITHFPFMA(maddfp, 0);
564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
565 #undef VARITHFPFMA
566
567 #define VARITHSAT_CASE(type, op, cvt, element) \
568 { \
569 type result = (type)a->element[i] op (type)b->element[i]; \
570 r->element[i] = cvt(result, &sat); \
571 }
572
573 #define VARITHSAT_DO(name, op, optype, cvt, element) \
574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
576 { \
577 int sat = 0; \
578 int i; \
579 \
580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
581 VARITHSAT_CASE(optype, op, cvt, element); \
582 } \
583 if (sat) { \
584 vscr_sat->u32[0] = 1; \
585 } \
586 }
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
600 #undef VARITHSAT_DO
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
603
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
606 { \
607 int i; \
608 \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
612 } \
613 }
614
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
616 unsigned_type) \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
622 #undef VAVG_DO
623 #undef VAVG
624
625 #define VABSDU_DO(name, element) \
626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
627 { \
628 int i; \
629 \
630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
631 r->element[i] = (a->element[i] > b->element[i]) ? \
632 (a->element[i] - b->element[i]) : \
633 (b->element[i] - a->element[i]); \
634 } \
635 }
636
637 /*
638 * VABSDU - Vector absolute difference unsigned
639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
640 * element - element type to access from vector
641 */
642 #define VABSDU(type, element) \
643 VABSDU_DO(absdu##type, element)
644 VABSDU(b, u8)
645 VABSDU(h, u16)
646 VABSDU(w, u32)
647 #undef VABSDU_DO
648 #undef VABSDU
649
650 #define VCF(suffix, cvt, element) \
651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
652 ppc_avr_t *b, uint32_t uim) \
653 { \
654 int i; \
655 \
656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
657 float32 t = cvt(b->element[i], &env->vec_status); \
658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
659 } \
660 }
661 VCF(ux, uint32_to_float32, u32)
662 VCF(sx, int32_to_float32, s32)
663 #undef VCF
664
665 #define VCMPNEZ(NAME, ELEM) \
666 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
667 { \
668 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
669 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
670 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
671 } \
672 }
673 VCMPNEZ(VCMPNEZB, u8)
674 VCMPNEZ(VCMPNEZH, u16)
675 VCMPNEZ(VCMPNEZW, u32)
676 #undef VCMPNEZ
677
678 #define VCMPFP_DO(suffix, compare, order, record) \
679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
680 ppc_avr_t *a, ppc_avr_t *b) \
681 { \
682 uint32_t ones = (uint32_t)-1; \
683 uint32_t all = ones; \
684 uint32_t none = 0; \
685 int i; \
686 \
687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
688 uint32_t result; \
689 FloatRelation rel = \
690 float32_compare_quiet(a->f32[i], b->f32[i], \
691 &env->vec_status); \
692 if (rel == float_relation_unordered) { \
693 result = 0; \
694 } else if (rel compare order) { \
695 result = ones; \
696 } else { \
697 result = 0; \
698 } \
699 r->u32[i] = result; \
700 all &= result; \
701 none |= result; \
702 } \
703 if (record) { \
704 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
705 } \
706 }
707 #define VCMPFP(suffix, compare, order) \
708 VCMPFP_DO(suffix, compare, order, 0) \
709 VCMPFP_DO(suffix##_dot, compare, order, 1)
710 VCMPFP(eqfp, ==, float_relation_equal)
711 VCMPFP(gefp, !=, float_relation_less)
712 VCMPFP(gtfp, ==, float_relation_greater)
713 #undef VCMPFP_DO
714 #undef VCMPFP
715
716 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
717 ppc_avr_t *a, ppc_avr_t *b, int record)
718 {
719 int i;
720 int all_in = 0;
721
722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
723 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
724 &env->vec_status);
725 if (le_rel == float_relation_unordered) {
726 r->u32[i] = 0xc0000000;
727 all_in = 1;
728 } else {
729 float32 bneg = float32_chs(b->f32[i]);
730 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
731 &env->vec_status);
732 int le = le_rel != float_relation_greater;
733 int ge = ge_rel != float_relation_less;
734
735 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
736 all_in |= (!le | !ge);
737 }
738 }
739 if (record) {
740 env->crf[6] = (all_in == 0) << 1;
741 }
742 }
743
744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
745 {
746 vcmpbfp_internal(env, r, a, b, 0);
747 }
748
749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
750 ppc_avr_t *b)
751 {
752 vcmpbfp_internal(env, r, a, b, 1);
753 }
754
755 #define VCT(suffix, satcvt, element) \
756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
757 ppc_avr_t *b, uint32_t uim) \
758 { \
759 int i; \
760 int sat = 0; \
761 float_status s = env->vec_status; \
762 \
763 set_float_rounding_mode(float_round_to_zero, &s); \
764 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
765 if (float32_is_any_nan(b->f32[i])) { \
766 r->element[i] = 0; \
767 } else { \
768 float64 t = float32_to_float64(b->f32[i], &s); \
769 int64_t j; \
770 \
771 t = float64_scalbn(t, uim, &s); \
772 j = float64_to_int64(t, &s); \
773 r->element[i] = satcvt(j, &sat); \
774 } \
775 } \
776 if (sat) { \
777 set_vscr_sat(env); \
778 } \
779 }
780 VCT(uxs, cvtsduw, u32)
781 VCT(sxs, cvtsdsw, s32)
782 #undef VCT
783
784 target_ulong helper_vclzlsbb(ppc_avr_t *r)
785 {
786 target_ulong count = 0;
787 int i;
788 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
789 if (r->VsrB(i) & 0x01) {
790 break;
791 }
792 count++;
793 }
794 return count;
795 }
796
797 target_ulong helper_vctzlsbb(ppc_avr_t *r)
798 {
799 target_ulong count = 0;
800 int i;
801 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
802 if (r->VsrB(i) & 0x01) {
803 break;
804 }
805 count++;
806 }
807 return count;
808 }
809
810 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
811 ppc_avr_t *b, ppc_avr_t *c)
812 {
813 int sat = 0;
814 int i;
815
816 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
817 int32_t prod = a->s16[i] * b->s16[i];
818 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
819
820 r->s16[i] = cvtswsh(t, &sat);
821 }
822
823 if (sat) {
824 set_vscr_sat(env);
825 }
826 }
827
828 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
829 ppc_avr_t *b, ppc_avr_t *c)
830 {
831 int sat = 0;
832 int i;
833
834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
835 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
836 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
837 r->s16[i] = cvtswsh(t, &sat);
838 }
839
840 if (sat) {
841 set_vscr_sat(env);
842 }
843 }
844
845 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
846 {
847 int i;
848
849 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
850 int32_t prod = a->s16[i] * b->s16[i];
851 r->s16[i] = (int16_t) (prod + c->s16[i]);
852 }
853 }
854
855 #define VMRG_DO(name, element, access, ofs) \
856 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
857 { \
858 ppc_avr_t result; \
859 int i, half = ARRAY_SIZE(r->element) / 2; \
860 \
861 for (i = 0; i < half; i++) { \
862 result.access(i * 2 + 0) = a->access(i + ofs); \
863 result.access(i * 2 + 1) = b->access(i + ofs); \
864 } \
865 *r = result; \
866 }
867
868 #define VMRG(suffix, element, access) \
869 VMRG_DO(mrgl##suffix, element, access, half) \
870 VMRG_DO(mrgh##suffix, element, access, 0)
871 VMRG(b, u8, VsrB)
872 VMRG(h, u16, VsrH)
873 VMRG(w, u32, VsrW)
874 #undef VMRG_DO
875 #undef VMRG
876
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
879 {
880 int32_t prod[16];
881 int i;
882
883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884 prod[i] = (int32_t)a->s8[i] * b->u8[i];
885 }
886
887 VECTOR_FOR_INORDER_I(i, s32) {
888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889 prod[4 * i + 2] + prod[4 * i + 3];
890 }
891 }
892
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
895 {
896 int32_t prod[8];
897 int i;
898
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 prod[i] = a->s16[i] * b->s16[i];
901 }
902
903 VECTOR_FOR_INORDER_I(i, s32) {
904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
905 }
906 }
907
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909 ppc_avr_t *b, ppc_avr_t *c)
910 {
911 int32_t prod[8];
912 int i;
913 int sat = 0;
914
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 prod[i] = (int32_t)a->s16[i] * b->s16[i];
917 }
918
919 VECTOR_FOR_INORDER_I(i, s32) {
920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
921
922 r->u32[i] = cvtsdsw(t, &sat);
923 }
924
925 if (sat) {
926 set_vscr_sat(env);
927 }
928 }
929
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931 ppc_avr_t *b, ppc_avr_t *c)
932 {
933 uint16_t prod[16];
934 int i;
935
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 prod[i] = a->u8[i] * b->u8[i];
938 }
939
940 VECTOR_FOR_INORDER_I(i, u32) {
941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942 prod[4 * i + 2] + prod[4 * i + 3];
943 }
944 }
945
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947 ppc_avr_t *b, ppc_avr_t *c)
948 {
949 uint32_t prod[8];
950 int i;
951
952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953 prod[i] = a->u16[i] * b->u16[i];
954 }
955
956 VECTOR_FOR_INORDER_I(i, u32) {
957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
958 }
959 }
960
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
963 {
964 uint32_t prod[8];
965 int i;
966 int sat = 0;
967
968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969 prod[i] = a->u16[i] * b->u16[i];
970 }
971
972 VECTOR_FOR_INORDER_I(i, s32) {
973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
974
975 r->u32[i] = cvtuduw(t, &sat);
976 }
977
978 if (sat) {
979 set_vscr_sat(env);
980 }
981 }
982
983 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
984 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
985 { \
986 int i; \
987 \
988 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
989 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
990 (cast)b->mul_access(i); \
991 } \
992 }
993
994 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
995 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
996 { \
997 int i; \
998 \
999 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1000 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1001 (cast)b->mul_access(i + 1); \
1002 } \
1003 }
1004
1005 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1006 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1008 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1009 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1010 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1011 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1012 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1013 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1014 #undef VMUL_DO_EVN
1015 #undef VMUL_DO_ODD
1016 #undef VMUL
1017
1018 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1019 target_ulong uim)
1020 {
1021 int i, idx;
1022 ppc_vsr_t tmp = { .u64 = {0, 0} };
1023
1024 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1025 if ((pcv->VsrB(i) >> 5) == uim) {
1026 idx = pcv->VsrB(i) & 0x1f;
1027 if (idx < ARRAY_SIZE(t->u8)) {
1028 tmp.VsrB(i) = s0->VsrB(idx);
1029 } else {
1030 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1031 }
1032 }
1033 }
1034
1035 *t = tmp;
1036 }
1037
1038 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1039 {
1040 ppc_avr_t result;
1041 int i;
1042
1043 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1044 int s = c->VsrB(i) & 0x1f;
1045 int index = s & 0xf;
1046
1047 if (s & 0x10) {
1048 result.VsrB(i) = b->VsrB(index);
1049 } else {
1050 result.VsrB(i) = a->VsrB(index);
1051 }
1052 }
1053 *r = result;
1054 }
1055
1056 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1057 {
1058 ppc_avr_t result;
1059 int i;
1060
1061 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1062 int s = c->VsrB(i) & 0x1f;
1063 int index = 15 - (s & 0xf);
1064
1065 if (s & 0x10) {
1066 result.VsrB(i) = a->VsrB(index);
1067 } else {
1068 result.VsrB(i) = b->VsrB(index);
1069 }
1070 }
1071 *r = result;
1072 }
1073
1074 #if defined(HOST_WORDS_BIGENDIAN)
1075 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1076 #define VBPERMD_INDEX(i) (i)
1077 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1078 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1079 #else
1080 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1081 #define VBPERMD_INDEX(i) (1 - i)
1082 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1083 #define EXTRACT_BIT(avr, i, index) \
1084 (extract64((avr)->u64[1 - i], 63 - index, 1))
1085 #endif
1086
1087 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1088 {
1089 int i, j;
1090 ppc_avr_t result = { .u64 = { 0, 0 } };
1091 VECTOR_FOR_INORDER_I(i, u64) {
1092 for (j = 0; j < 8; j++) {
1093 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1094 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1095 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1096 }
1097 }
1098 }
1099 *r = result;
1100 }
1101
1102 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1103 {
1104 int i;
1105 uint64_t perm = 0;
1106
1107 VECTOR_FOR_INORDER_I(i, u8) {
1108 int index = VBPERMQ_INDEX(b, i);
1109
1110 if (index < 128) {
1111 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1112 if (a->u64[VBPERMQ_DW(index)] & mask) {
1113 perm |= (0x8000 >> i);
1114 }
1115 }
1116 }
1117
1118 r->VsrD(0) = perm;
1119 r->VsrD(1) = 0;
1120 }
1121
1122 #undef VBPERMQ_INDEX
1123 #undef VBPERMQ_DW
1124
1125 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1126 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1127 { \
1128 int i, j; \
1129 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1130 \
1131 VECTOR_FOR_INORDER_I(i, srcfld) { \
1132 prod[i] = 0; \
1133 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1134 if (a->srcfld[i] & (1ull << j)) { \
1135 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1136 } \
1137 } \
1138 } \
1139 \
1140 VECTOR_FOR_INORDER_I(i, trgfld) { \
1141 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1142 } \
1143 }
1144
1145 PMSUM(vpmsumb, u8, u16, uint16_t)
1146 PMSUM(vpmsumh, u16, u32, uint32_t)
1147 PMSUM(vpmsumw, u32, u64, uint64_t)
1148
1149 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1150 {
1151
1152 #ifdef CONFIG_INT128
1153 int i, j;
1154 __uint128_t prod[2];
1155
1156 VECTOR_FOR_INORDER_I(i, u64) {
1157 prod[i] = 0;
1158 for (j = 0; j < 64; j++) {
1159 if (a->u64[i] & (1ull << j)) {
1160 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1161 }
1162 }
1163 }
1164
1165 r->u128 = prod[0] ^ prod[1];
1166
1167 #else
1168 int i, j;
1169 ppc_avr_t prod[2];
1170
1171 VECTOR_FOR_INORDER_I(i, u64) {
1172 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1173 for (j = 0; j < 64; j++) {
1174 if (a->u64[i] & (1ull << j)) {
1175 ppc_avr_t bshift;
1176 if (j == 0) {
1177 bshift.VsrD(0) = 0;
1178 bshift.VsrD(1) = b->u64[i];
1179 } else {
1180 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1181 bshift.VsrD(1) = b->u64[i] << j;
1182 }
1183 prod[i].VsrD(1) ^= bshift.VsrD(1);
1184 prod[i].VsrD(0) ^= bshift.VsrD(0);
1185 }
1186 }
1187 }
1188
1189 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1190 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1191 #endif
1192 }
1193
1194
1195 #if defined(HOST_WORDS_BIGENDIAN)
1196 #define PKBIG 1
1197 #else
1198 #define PKBIG 0
1199 #endif
1200 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1201 {
1202 int i, j;
1203 ppc_avr_t result;
1204 #if defined(HOST_WORDS_BIGENDIAN)
1205 const ppc_avr_t *x[2] = { a, b };
1206 #else
1207 const ppc_avr_t *x[2] = { b, a };
1208 #endif
1209
1210 VECTOR_FOR_INORDER_I(i, u64) {
1211 VECTOR_FOR_INORDER_I(j, u32) {
1212 uint32_t e = x[i]->u32[j];
1213
1214 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1215 ((e >> 6) & 0x3e0) |
1216 ((e >> 3) & 0x1f));
1217 }
1218 }
1219 *r = result;
1220 }
1221
1222 #define VPK(suffix, from, to, cvt, dosat) \
1223 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1224 ppc_avr_t *a, ppc_avr_t *b) \
1225 { \
1226 int i; \
1227 int sat = 0; \
1228 ppc_avr_t result; \
1229 ppc_avr_t *a0 = PKBIG ? a : b; \
1230 ppc_avr_t *a1 = PKBIG ? b : a; \
1231 \
1232 VECTOR_FOR_INORDER_I(i, from) { \
1233 result.to[i] = cvt(a0->from[i], &sat); \
1234 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1235 } \
1236 *r = result; \
1237 if (dosat && sat) { \
1238 set_vscr_sat(env); \
1239 } \
1240 }
1241 #define I(x, y) (x)
1242 VPK(shss, s16, s8, cvtshsb, 1)
1243 VPK(shus, s16, u8, cvtshub, 1)
1244 VPK(swss, s32, s16, cvtswsh, 1)
1245 VPK(swus, s32, u16, cvtswuh, 1)
1246 VPK(sdss, s64, s32, cvtsdsw, 1)
1247 VPK(sdus, s64, u32, cvtsduw, 1)
1248 VPK(uhus, u16, u8, cvtuhub, 1)
1249 VPK(uwus, u32, u16, cvtuwuh, 1)
1250 VPK(udus, u64, u32, cvtuduw, 1)
1251 VPK(uhum, u16, u8, I, 0)
1252 VPK(uwum, u32, u16, I, 0)
1253 VPK(udum, u64, u32, I, 0)
1254 #undef I
1255 #undef VPK
1256 #undef PKBIG
1257
1258 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1259 {
1260 int i;
1261
1262 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1263 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1264 }
1265 }
1266
1267 #define VRFI(suffix, rounding) \
1268 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1269 ppc_avr_t *b) \
1270 { \
1271 int i; \
1272 float_status s = env->vec_status; \
1273 \
1274 set_float_rounding_mode(rounding, &s); \
1275 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1276 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1277 } \
1278 }
1279 VRFI(n, float_round_nearest_even)
1280 VRFI(m, float_round_down)
1281 VRFI(p, float_round_up)
1282 VRFI(z, float_round_to_zero)
1283 #undef VRFI
1284
1285 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1286 {
1287 int i;
1288
1289 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1290 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1291
1292 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1293 }
1294 }
1295
1296 #define VRLMI(name, size, element, insert) \
1297 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1298 { \
1299 int i; \
1300 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1301 uint##size##_t src1 = a->element[i]; \
1302 uint##size##_t src2 = b->element[i]; \
1303 uint##size##_t src3 = r->element[i]; \
1304 uint##size##_t begin, end, shift, mask, rot_val; \
1305 \
1306 shift = extract##size(src2, 0, 6); \
1307 end = extract##size(src2, 8, 6); \
1308 begin = extract##size(src2, 16, 6); \
1309 rot_val = rol##size(src1, shift); \
1310 mask = mask_u##size(begin, end); \
1311 if (insert) { \
1312 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1313 } else { \
1314 r->element[i] = (rot_val & mask); \
1315 } \
1316 } \
1317 }
1318
1319 VRLMI(VRLDMI, 64, u64, 1);
1320 VRLMI(VRLWMI, 32, u32, 1);
1321 VRLMI(VRLDNM, 64, u64, 0);
1322 VRLMI(VRLWNM, 32, u32, 0);
1323
1324 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1325 {
1326 int i;
1327
1328 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1329 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1330 }
1331 }
1332
1333 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1334 {
1335 int i;
1336
1337 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1338 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1339 }
1340 }
1341
1342 #define VEXTU_X_DO(name, size, left) \
1343 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1344 { \
1345 int index = (a & 0xf) * 8; \
1346 if (left) { \
1347 index = 128 - index - size; \
1348 } \
1349 return int128_getlo(int128_rshift(b->s128, index)) & \
1350 MAKE_64BIT_MASK(0, size); \
1351 }
1352 VEXTU_X_DO(vextublx, 8, 1)
1353 VEXTU_X_DO(vextuhlx, 16, 1)
1354 VEXTU_X_DO(vextuwlx, 32, 1)
1355 VEXTU_X_DO(vextubrx, 8, 0)
1356 VEXTU_X_DO(vextuhrx, 16, 0)
1357 VEXTU_X_DO(vextuwrx, 32, 0)
1358 #undef VEXTU_X_DO
1359
1360 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1361 {
1362 int i;
1363 unsigned int shift, bytes, size;
1364
1365 size = ARRAY_SIZE(r->u8);
1366 for (i = 0; i < size; i++) {
1367 shift = b->VsrB(i) & 0x7; /* extract shift value */
1368 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1369 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1370 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1371 }
1372 }
1373
1374 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1375 {
1376 int i;
1377 unsigned int shift, bytes;
1378
1379 /*
1380 * Use reverse order, as destination and source register can be
1381 * same. Its being modified in place saving temporary, reverse
1382 * order will guarantee that computed result is not fed back.
1383 */
1384 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1385 shift = b->VsrB(i) & 0x7; /* extract shift value */
1386 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1387 /* extract adjacent bytes */
1388 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1389 }
1390 }
1391
1392 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1393 {
1394 int sh = shift & 0xf;
1395 int i;
1396 ppc_avr_t result;
1397
1398 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1399 int index = sh + i;
1400 if (index > 0xf) {
1401 result.VsrB(i) = b->VsrB(index - 0x10);
1402 } else {
1403 result.VsrB(i) = a->VsrB(index);
1404 }
1405 }
1406 *r = result;
1407 }
1408
1409 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1410 {
1411 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1412
1413 #if defined(HOST_WORDS_BIGENDIAN)
1414 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1415 memset(&r->u8[16 - sh], 0, sh);
1416 #else
1417 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1418 memset(&r->u8[0], 0, sh);
1419 #endif
1420 }
1421
1422 #if defined(HOST_WORDS_BIGENDIAN)
1423 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1424 #else
1425 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1426 #endif
1427
1428 #define VINSX(SUFFIX, TYPE) \
1429 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1430 uint64_t val, target_ulong index) \
1431 { \
1432 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1433 target_long idx = index; \
1434 \
1435 if (idx < 0 || idx > maxidx) { \
1436 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1437 qemu_log_mask(LOG_GUEST_ERROR, \
1438 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1439 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1440 } else { \
1441 TYPE src = val; \
1442 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1443 } \
1444 }
1445 VINSX(B, uint8_t)
1446 VINSX(H, uint16_t)
1447 VINSX(W, uint32_t)
1448 VINSX(D, uint64_t)
1449 #undef ELEM_ADDR
1450 #undef VINSX
1451 #if defined(HOST_WORDS_BIGENDIAN)
1452 #define VEXTDVLX(NAME, SIZE) \
1453 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1454 target_ulong index) \
1455 { \
1456 const target_long idx = index; \
1457 ppc_avr_t tmp[2] = { *a, *b }; \
1458 memset(t, 0, sizeof(*t)); \
1459 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1460 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1461 } else { \
1462 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1463 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1464 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1465 } \
1466 }
1467 #else
1468 #define VEXTDVLX(NAME, SIZE) \
1469 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1470 target_ulong index) \
1471 { \
1472 const target_long idx = index; \
1473 ppc_avr_t tmp[2] = { *b, *a }; \
1474 memset(t, 0, sizeof(*t)); \
1475 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1476 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1477 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1478 } else { \
1479 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1480 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1481 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1482 } \
1483 }
1484 #endif
1485 VEXTDVLX(VEXTDUBVLX, 1)
1486 VEXTDVLX(VEXTDUHVLX, 2)
1487 VEXTDVLX(VEXTDUWVLX, 4)
1488 VEXTDVLX(VEXTDDVLX, 8)
1489 #undef VEXTDVLX
1490 #if defined(HOST_WORDS_BIGENDIAN)
1491 #define VEXTRACT(suffix, element) \
1492 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1493 { \
1494 uint32_t es = sizeof(r->element[0]); \
1495 memmove(&r->u8[8 - es], &b->u8[index], es); \
1496 memset(&r->u8[8], 0, 8); \
1497 memset(&r->u8[0], 0, 8 - es); \
1498 }
1499 #else
1500 #define VEXTRACT(suffix, element) \
1501 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1502 { \
1503 uint32_t es = sizeof(r->element[0]); \
1504 uint32_t s = (16 - index) - es; \
1505 memmove(&r->u8[8], &b->u8[s], es); \
1506 memset(&r->u8[0], 0, 8); \
1507 memset(&r->u8[8 + es], 0, 8 - es); \
1508 }
1509 #endif
1510 VEXTRACT(ub, u8)
1511 VEXTRACT(uh, u16)
1512 VEXTRACT(uw, u32)
1513 VEXTRACT(d, u64)
1514 #undef VEXTRACT
1515
1516 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1517 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1518 { \
1519 int i, idx, crf = 0; \
1520 \
1521 for (i = 0; i < NUM_ELEMS; i++) { \
1522 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1523 if (b->Vsr##ELEM(idx)) { \
1524 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1525 } else { \
1526 crf = 0b0010; \
1527 break; \
1528 } \
1529 } \
1530 \
1531 for (; i < NUM_ELEMS; i++) { \
1532 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1533 t->Vsr##ELEM(idx) = 0; \
1534 } \
1535 \
1536 return crf; \
1537 }
1538 VSTRI(VSTRIBL, B, 16, true)
1539 VSTRI(VSTRIBR, B, 16, false)
1540 VSTRI(VSTRIHL, H, 8, true)
1541 VSTRI(VSTRIHR, H, 8, false)
1542 #undef VSTRI
1543
1544 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1545 ppc_vsr_t *xb, uint32_t index)
1546 {
1547 ppc_vsr_t t = { };
1548 size_t es = sizeof(uint32_t);
1549 uint32_t ext_index;
1550 int i;
1551
1552 ext_index = index;
1553 for (i = 0; i < es; i++, ext_index++) {
1554 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1555 }
1556
1557 *xt = t;
1558 }
1559
1560 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1561 ppc_vsr_t *xb, uint32_t index)
1562 {
1563 ppc_vsr_t t = *xt;
1564 size_t es = sizeof(uint32_t);
1565 int ins_index, i = 0;
1566
1567 ins_index = index;
1568 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1569 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1570 }
1571
1572 *xt = t;
1573 }
1574
1575 #define XXBLEND(name, sz) \
1576 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1577 ppc_avr_t *c, uint32_t desc) \
1578 { \
1579 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1580 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1581 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1582 } \
1583 }
1584 XXBLEND(B, 8)
1585 XXBLEND(H, 16)
1586 XXBLEND(W, 32)
1587 XXBLEND(D, 64)
1588 #undef XXBLEND
1589
1590 #define VNEG(name, element) \
1591 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1592 { \
1593 int i; \
1594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1595 r->element[i] = -b->element[i]; \
1596 } \
1597 }
1598 VNEG(vnegw, s32)
1599 VNEG(vnegd, s64)
1600 #undef VNEG
1601
1602 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1603 {
1604 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1605
1606 #if defined(HOST_WORDS_BIGENDIAN)
1607 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1608 memset(&r->u8[0], 0, sh);
1609 #else
1610 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1611 memset(&r->u8[16 - sh], 0, sh);
1612 #endif
1613 }
1614
1615 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1616 {
1617 int i;
1618
1619 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1620 r->u32[i] = a->u32[i] >= b->u32[i];
1621 }
1622 }
1623
1624 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1625 {
1626 int64_t t;
1627 int i, upper;
1628 ppc_avr_t result;
1629 int sat = 0;
1630
1631 upper = ARRAY_SIZE(r->s32) - 1;
1632 t = (int64_t)b->VsrSW(upper);
1633 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1634 t += a->VsrSW(i);
1635 result.VsrSW(i) = 0;
1636 }
1637 result.VsrSW(upper) = cvtsdsw(t, &sat);
1638 *r = result;
1639
1640 if (sat) {
1641 set_vscr_sat(env);
1642 }
1643 }
1644
1645 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1646 {
1647 int i, j, upper;
1648 ppc_avr_t result;
1649 int sat = 0;
1650
1651 upper = 1;
1652 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1653 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1654
1655 result.VsrD(i) = 0;
1656 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1657 t += a->VsrSW(2 * i + j);
1658 }
1659 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1660 }
1661
1662 *r = result;
1663 if (sat) {
1664 set_vscr_sat(env);
1665 }
1666 }
1667
1668 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1669 {
1670 int i, j;
1671 int sat = 0;
1672
1673 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1674 int64_t t = (int64_t)b->s32[i];
1675
1676 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1677 t += a->s8[4 * i + j];
1678 }
1679 r->s32[i] = cvtsdsw(t, &sat);
1680 }
1681
1682 if (sat) {
1683 set_vscr_sat(env);
1684 }
1685 }
1686
1687 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1688 {
1689 int sat = 0;
1690 int i;
1691
1692 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1693 int64_t t = (int64_t)b->s32[i];
1694
1695 t += a->s16[2 * i] + a->s16[2 * i + 1];
1696 r->s32[i] = cvtsdsw(t, &sat);
1697 }
1698
1699 if (sat) {
1700 set_vscr_sat(env);
1701 }
1702 }
1703
1704 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1705 {
1706 int i, j;
1707 int sat = 0;
1708
1709 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1710 uint64_t t = (uint64_t)b->u32[i];
1711
1712 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1713 t += a->u8[4 * i + j];
1714 }
1715 r->u32[i] = cvtuduw(t, &sat);
1716 }
1717
1718 if (sat) {
1719 set_vscr_sat(env);
1720 }
1721 }
1722
1723 #if defined(HOST_WORDS_BIGENDIAN)
1724 #define UPKHI 1
1725 #define UPKLO 0
1726 #else
1727 #define UPKHI 0
1728 #define UPKLO 1
1729 #endif
1730 #define VUPKPX(suffix, hi) \
1731 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1732 { \
1733 int i; \
1734 ppc_avr_t result; \
1735 \
1736 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1737 uint16_t e = b->u16[hi ? i : i + 4]; \
1738 uint8_t a = (e >> 15) ? 0xff : 0; \
1739 uint8_t r = (e >> 10) & 0x1f; \
1740 uint8_t g = (e >> 5) & 0x1f; \
1741 uint8_t b = e & 0x1f; \
1742 \
1743 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1744 } \
1745 *r = result; \
1746 }
1747 VUPKPX(lpx, UPKLO)
1748 VUPKPX(hpx, UPKHI)
1749 #undef VUPKPX
1750
1751 #define VUPK(suffix, unpacked, packee, hi) \
1752 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1753 { \
1754 int i; \
1755 ppc_avr_t result; \
1756 \
1757 if (hi) { \
1758 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1759 result.unpacked[i] = b->packee[i]; \
1760 } \
1761 } else { \
1762 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1763 i++) { \
1764 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1765 } \
1766 } \
1767 *r = result; \
1768 }
1769 VUPK(hsb, s16, s8, UPKHI)
1770 VUPK(hsh, s32, s16, UPKHI)
1771 VUPK(hsw, s64, s32, UPKHI)
1772 VUPK(lsb, s16, s8, UPKLO)
1773 VUPK(lsh, s32, s16, UPKLO)
1774 VUPK(lsw, s64, s32, UPKLO)
1775 #undef VUPK
1776 #undef UPKHI
1777 #undef UPKLO
1778
1779 #define VGENERIC_DO(name, element) \
1780 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1781 { \
1782 int i; \
1783 \
1784 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1785 r->element[i] = name(b->element[i]); \
1786 } \
1787 }
1788
1789 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1790 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1791
1792 VGENERIC_DO(clzb, u8)
1793 VGENERIC_DO(clzh, u16)
1794
1795 #undef clzb
1796 #undef clzh
1797
1798 #define ctzb(v) ((v) ? ctz32(v) : 8)
1799 #define ctzh(v) ((v) ? ctz32(v) : 16)
1800 #define ctzw(v) ctz32((v))
1801 #define ctzd(v) ctz64((v))
1802
1803 VGENERIC_DO(ctzb, u8)
1804 VGENERIC_DO(ctzh, u16)
1805 VGENERIC_DO(ctzw, u32)
1806 VGENERIC_DO(ctzd, u64)
1807
1808 #undef ctzb
1809 #undef ctzh
1810 #undef ctzw
1811 #undef ctzd
1812
1813 #define popcntb(v) ctpop8(v)
1814 #define popcnth(v) ctpop16(v)
1815 #define popcntw(v) ctpop32(v)
1816 #define popcntd(v) ctpop64(v)
1817
1818 VGENERIC_DO(popcntb, u8)
1819 VGENERIC_DO(popcnth, u16)
1820 VGENERIC_DO(popcntw, u32)
1821 VGENERIC_DO(popcntd, u64)
1822
1823 #undef popcntb
1824 #undef popcnth
1825 #undef popcntw
1826 #undef popcntd
1827
1828 #undef VGENERIC_DO
1829
1830 #if defined(HOST_WORDS_BIGENDIAN)
1831 #define QW_ONE { .u64 = { 0, 1 } }
1832 #else
1833 #define QW_ONE { .u64 = { 1, 0 } }
1834 #endif
1835
1836 #ifndef CONFIG_INT128
1837
1838 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1839 {
1840 t->u64[0] = ~a.u64[0];
1841 t->u64[1] = ~a.u64[1];
1842 }
1843
1844 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1845 {
1846 if (a.VsrD(0) < b.VsrD(0)) {
1847 return -1;
1848 } else if (a.VsrD(0) > b.VsrD(0)) {
1849 return 1;
1850 } else if (a.VsrD(1) < b.VsrD(1)) {
1851 return -1;
1852 } else if (a.VsrD(1) > b.VsrD(1)) {
1853 return 1;
1854 } else {
1855 return 0;
1856 }
1857 }
1858
1859 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1860 {
1861 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1862 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1863 (~a.VsrD(1) < b.VsrD(1));
1864 }
1865
1866 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1867 {
1868 ppc_avr_t not_a;
1869 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1870 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1871 (~a.VsrD(1) < b.VsrD(1));
1872 avr_qw_not(&not_a, a);
1873 return avr_qw_cmpu(not_a, b) < 0;
1874 }
1875
1876 #endif
1877
1878 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1879 {
1880 #ifdef CONFIG_INT128
1881 r->u128 = a->u128 + b->u128;
1882 #else
1883 avr_qw_add(r, *a, *b);
1884 #endif
1885 }
1886
1887 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1888 {
1889 #ifdef CONFIG_INT128
1890 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1891 #else
1892
1893 if (c->VsrD(1) & 1) {
1894 ppc_avr_t tmp;
1895
1896 tmp.VsrD(0) = 0;
1897 tmp.VsrD(1) = c->VsrD(1) & 1;
1898 avr_qw_add(&tmp, *a, tmp);
1899 avr_qw_add(r, tmp, *b);
1900 } else {
1901 avr_qw_add(r, *a, *b);
1902 }
1903 #endif
1904 }
1905
1906 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1907 {
1908 #ifdef CONFIG_INT128
1909 r->u128 = (~a->u128 < b->u128);
1910 #else
1911 ppc_avr_t not_a;
1912
1913 avr_qw_not(&not_a, *a);
1914
1915 r->VsrD(0) = 0;
1916 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1917 #endif
1918 }
1919
1920 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1921 {
1922 #ifdef CONFIG_INT128
1923 int carry_out = (~a->u128 < b->u128);
1924 if (!carry_out && (c->u128 & 1)) {
1925 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1926 ((a->u128 != 0) || (b->u128 != 0));
1927 }
1928 r->u128 = carry_out;
1929 #else
1930
1931 int carry_in = c->VsrD(1) & 1;
1932 int carry_out = 0;
1933 ppc_avr_t tmp;
1934
1935 carry_out = avr_qw_addc(&tmp, *a, *b);
1936
1937 if (!carry_out && carry_in) {
1938 ppc_avr_t one = QW_ONE;
1939 carry_out = avr_qw_addc(&tmp, tmp, one);
1940 }
1941 r->VsrD(0) = 0;
1942 r->VsrD(1) = carry_out;
1943 #endif
1944 }
1945
1946 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1947 {
1948 #ifdef CONFIG_INT128
1949 r->u128 = a->u128 - b->u128;
1950 #else
1951 ppc_avr_t tmp;
1952 ppc_avr_t one = QW_ONE;
1953
1954 avr_qw_not(&tmp, *b);
1955 avr_qw_add(&tmp, *a, tmp);
1956 avr_qw_add(r, tmp, one);
1957 #endif
1958 }
1959
1960 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1961 {
1962 #ifdef CONFIG_INT128
1963 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1964 #else
1965 ppc_avr_t tmp, sum;
1966
1967 avr_qw_not(&tmp, *b);
1968 avr_qw_add(&sum, *a, tmp);
1969
1970 tmp.VsrD(0) = 0;
1971 tmp.VsrD(1) = c->VsrD(1) & 1;
1972 avr_qw_add(r, sum, tmp);
1973 #endif
1974 }
1975
1976 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1977 {
1978 #ifdef CONFIG_INT128
1979 r->u128 = (~a->u128 < ~b->u128) ||
1980 (a->u128 + ~b->u128 == (__uint128_t)-1);
1981 #else
1982 int carry = (avr_qw_cmpu(*a, *b) > 0);
1983 if (!carry) {
1984 ppc_avr_t tmp;
1985 avr_qw_not(&tmp, *b);
1986 avr_qw_add(&tmp, *a, tmp);
1987 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
1988 }
1989 r->VsrD(0) = 0;
1990 r->VsrD(1) = carry;
1991 #endif
1992 }
1993
1994 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1995 {
1996 #ifdef CONFIG_INT128
1997 r->u128 =
1998 (~a->u128 < ~b->u128) ||
1999 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2000 #else
2001 int carry_in = c->VsrD(1) & 1;
2002 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2003 if (!carry_out && carry_in) {
2004 ppc_avr_t tmp;
2005 avr_qw_not(&tmp, *b);
2006 avr_qw_add(&tmp, *a, tmp);
2007 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2008 }
2009
2010 r->VsrD(0) = 0;
2011 r->VsrD(1) = carry_out;
2012 #endif
2013 }
2014
2015 #define BCD_PLUS_PREF_1 0xC
2016 #define BCD_PLUS_PREF_2 0xF
2017 #define BCD_PLUS_ALT_1 0xA
2018 #define BCD_NEG_PREF 0xD
2019 #define BCD_NEG_ALT 0xB
2020 #define BCD_PLUS_ALT_2 0xE
2021 #define NATIONAL_PLUS 0x2B
2022 #define NATIONAL_NEG 0x2D
2023
2024 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2025
2026 static int bcd_get_sgn(ppc_avr_t *bcd)
2027 {
2028 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2029 case BCD_PLUS_PREF_1:
2030 case BCD_PLUS_PREF_2:
2031 case BCD_PLUS_ALT_1:
2032 case BCD_PLUS_ALT_2:
2033 {
2034 return 1;
2035 }
2036
2037 case BCD_NEG_PREF:
2038 case BCD_NEG_ALT:
2039 {
2040 return -1;
2041 }
2042
2043 default:
2044 {
2045 return 0;
2046 }
2047 }
2048 }
2049
2050 static int bcd_preferred_sgn(int sgn, int ps)
2051 {
2052 if (sgn >= 0) {
2053 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2054 } else {
2055 return BCD_NEG_PREF;
2056 }
2057 }
2058
2059 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2060 {
2061 uint8_t result;
2062 if (n & 1) {
2063 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2064 } else {
2065 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2066 }
2067
2068 if (unlikely(result > 9)) {
2069 *invalid = true;
2070 }
2071 return result;
2072 }
2073
2074 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2075 {
2076 if (n & 1) {
2077 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2078 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2079 } else {
2080 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2081 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2082 }
2083 }
2084
2085 static bool bcd_is_valid(ppc_avr_t *bcd)
2086 {
2087 int i;
2088 int invalid = 0;
2089
2090 if (bcd_get_sgn(bcd) == 0) {
2091 return false;
2092 }
2093
2094 for (i = 1; i < 32; i++) {
2095 bcd_get_digit(bcd, i, &invalid);
2096 if (unlikely(invalid)) {
2097 return false;
2098 }
2099 }
2100 return true;
2101 }
2102
2103 static int bcd_cmp_zero(ppc_avr_t *bcd)
2104 {
2105 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2106 return CRF_EQ;
2107 } else {
2108 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2109 }
2110 }
2111
2112 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2113 {
2114 return reg->VsrH(7 - n);
2115 }
2116
2117 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2118 {
2119 reg->VsrH(7 - n) = val;
2120 }
2121
2122 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2123 {
2124 int i;
2125 int invalid = 0;
2126 for (i = 31; i > 0; i--) {
2127 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2128 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2129 if (unlikely(invalid)) {
2130 return 0; /* doesn't matter */
2131 } else if (dig_a > dig_b) {
2132 return 1;
2133 } else if (dig_a < dig_b) {
2134 return -1;
2135 }
2136 }
2137
2138 return 0;
2139 }
2140
2141 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2142 int *overflow)
2143 {
2144 int carry = 0;
2145 int i;
2146 int is_zero = 1;
2147
2148 for (i = 1; i <= 31; i++) {
2149 uint8_t digit = bcd_get_digit(a, i, invalid) +
2150 bcd_get_digit(b, i, invalid) + carry;
2151 is_zero &= (digit == 0);
2152 if (digit > 9) {
2153 carry = 1;
2154 digit -= 10;
2155 } else {
2156 carry = 0;
2157 }
2158
2159 bcd_put_digit(t, digit, i);
2160 }
2161
2162 *overflow = carry;
2163 return is_zero;
2164 }
2165
2166 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2167 int *overflow)
2168 {
2169 int carry = 0;
2170 int i;
2171
2172 for (i = 1; i <= 31; i++) {
2173 uint8_t digit = bcd_get_digit(a, i, invalid) -
2174 bcd_get_digit(b, i, invalid) + carry;
2175 if (digit & 0x80) {
2176 carry = -1;
2177 digit += 10;
2178 } else {
2179 carry = 0;
2180 }
2181
2182 bcd_put_digit(t, digit, i);
2183 }
2184
2185 *overflow = carry;
2186 }
2187
2188 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2189 {
2190
2191 int sgna = bcd_get_sgn(a);
2192 int sgnb = bcd_get_sgn(b);
2193 int invalid = (sgna == 0) || (sgnb == 0);
2194 int overflow = 0;
2195 int zero = 0;
2196 uint32_t cr = 0;
2197 ppc_avr_t result = { .u64 = { 0, 0 } };
2198
2199 if (!invalid) {
2200 if (sgna == sgnb) {
2201 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2202 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2203 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2204 } else {
2205 int magnitude = bcd_cmp_mag(a, b);
2206 if (magnitude > 0) {
2207 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2208 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2209 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2210 } else if (magnitude < 0) {
2211 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2212 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2213 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2214 } else {
2215 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2216 cr = CRF_EQ;
2217 }
2218 }
2219 }
2220
2221 if (unlikely(invalid)) {
2222 result.VsrD(0) = result.VsrD(1) = -1;
2223 cr = CRF_SO;
2224 } else if (overflow) {
2225 cr |= CRF_SO;
2226 } else if (zero) {
2227 cr |= CRF_EQ;
2228 }
2229
2230 *r = result;
2231
2232 return cr;
2233 }
2234
2235 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2236 {
2237 ppc_avr_t bcopy = *b;
2238 int sgnb = bcd_get_sgn(b);
2239 if (sgnb < 0) {
2240 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2241 } else if (sgnb > 0) {
2242 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2243 }
2244 /* else invalid ... defer to bcdadd code for proper handling */
2245
2246 return helper_bcdadd(r, a, &bcopy, ps);
2247 }
2248
2249 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2250 {
2251 int i;
2252 int cr = 0;
2253 uint16_t national = 0;
2254 uint16_t sgnb = get_national_digit(b, 0);
2255 ppc_avr_t ret = { .u64 = { 0, 0 } };
2256 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2257
2258 for (i = 1; i < 8; i++) {
2259 national = get_national_digit(b, i);
2260 if (unlikely(national < 0x30 || national > 0x39)) {
2261 invalid = 1;
2262 break;
2263 }
2264
2265 bcd_put_digit(&ret, national & 0xf, i);
2266 }
2267
2268 if (sgnb == NATIONAL_PLUS) {
2269 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2270 } else {
2271 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2272 }
2273
2274 cr = bcd_cmp_zero(&ret);
2275
2276 if (unlikely(invalid)) {
2277 cr = CRF_SO;
2278 }
2279
2280 *r = ret;
2281
2282 return cr;
2283 }
2284
2285 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2286 {
2287 int i;
2288 int cr = 0;
2289 int sgnb = bcd_get_sgn(b);
2290 int invalid = (sgnb == 0);
2291 ppc_avr_t ret = { .u64 = { 0, 0 } };
2292
2293 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2294
2295 for (i = 1; i < 8; i++) {
2296 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2297
2298 if (unlikely(invalid)) {
2299 break;
2300 }
2301 }
2302 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2303
2304 cr = bcd_cmp_zero(b);
2305
2306 if (ox_flag) {
2307 cr |= CRF_SO;
2308 }
2309
2310 if (unlikely(invalid)) {
2311 cr = CRF_SO;
2312 }
2313
2314 *r = ret;
2315
2316 return cr;
2317 }
2318
2319 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2320 {
2321 int i;
2322 int cr = 0;
2323 int invalid = 0;
2324 int zone_digit = 0;
2325 int zone_lead = ps ? 0xF : 0x3;
2326 int digit = 0;
2327 ppc_avr_t ret = { .u64 = { 0, 0 } };
2328 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2329
2330 if (unlikely((sgnb < 0xA) && ps)) {
2331 invalid = 1;
2332 }
2333
2334 for (i = 0; i < 16; i++) {
2335 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2336 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2337 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2338 invalid = 1;
2339 break;
2340 }
2341
2342 bcd_put_digit(&ret, digit, i + 1);
2343 }
2344
2345 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2346 (!ps && (sgnb & 0x4))) {
2347 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2348 } else {
2349 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2350 }
2351
2352 cr = bcd_cmp_zero(&ret);
2353
2354 if (unlikely(invalid)) {
2355 cr = CRF_SO;
2356 }
2357
2358 *r = ret;
2359
2360 return cr;
2361 }
2362
2363 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2364 {
2365 int i;
2366 int cr = 0;
2367 uint8_t digit = 0;
2368 int sgnb = bcd_get_sgn(b);
2369 int zone_lead = (ps) ? 0xF0 : 0x30;
2370 int invalid = (sgnb == 0);
2371 ppc_avr_t ret = { .u64 = { 0, 0 } };
2372
2373 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2374
2375 for (i = 0; i < 16; i++) {
2376 digit = bcd_get_digit(b, i + 1, &invalid);
2377
2378 if (unlikely(invalid)) {
2379 break;
2380 }
2381
2382 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2383 }
2384
2385 if (ps) {
2386 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2387 } else {
2388 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2389 }
2390
2391 cr = bcd_cmp_zero(b);
2392
2393 if (ox_flag) {
2394 cr |= CRF_SO;
2395 }
2396
2397 if (unlikely(invalid)) {
2398 cr = CRF_SO;
2399 }
2400
2401 *r = ret;
2402
2403 return cr;
2404 }
2405
2406 /**
2407 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2408 *
2409 * Returns:
2410 * > 0 if ahi|alo > bhi|blo,
2411 * 0 if ahi|alo == bhi|blo,
2412 * < 0 if ahi|alo < bhi|blo
2413 */
2414 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2415 uint64_t blo, uint64_t bhi)
2416 {
2417 return (ahi == bhi) ?
2418 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2419 (ahi > bhi ? 1 : -1);
2420 }
2421
2422 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2423 {
2424 int i;
2425 int cr;
2426 uint64_t lo_value;
2427 uint64_t hi_value;
2428 uint64_t rem;
2429 ppc_avr_t ret = { .u64 = { 0, 0 } };
2430
2431 if (b->VsrSD(0) < 0) {
2432 lo_value = -b->VsrSD(1);
2433 hi_value = ~b->VsrD(0) + !lo_value;
2434 bcd_put_digit(&ret, 0xD, 0);
2435
2436 cr = CRF_LT;
2437 } else {
2438 lo_value = b->VsrD(1);
2439 hi_value = b->VsrD(0);
2440 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2441
2442 if (hi_value == 0 && lo_value == 0) {
2443 cr = CRF_EQ;
2444 } else {
2445 cr = CRF_GT;
2446 }
2447 }
2448
2449 /*
2450 * Check src limits: abs(src) <= 10^31 - 1
2451 *
2452 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2453 */
2454 if (ucmp128(lo_value, hi_value,
2455 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2456 cr |= CRF_SO;
2457
2458 /*
2459 * According to the ISA, if src wouldn't fit in the destination
2460 * register, the result is undefined.
2461 * In that case, we leave r unchanged.
2462 */
2463 } else {
2464 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2465
2466 for (i = 1; i < 16; rem /= 10, i++) {
2467 bcd_put_digit(&ret, rem % 10, i);
2468 }
2469
2470 for (; i < 32; lo_value /= 10, i++) {
2471 bcd_put_digit(&ret, lo_value % 10, i);
2472 }
2473
2474 *r = ret;
2475 }
2476
2477 return cr;
2478 }
2479
2480 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2481 {
2482 uint8_t i;
2483 int cr;
2484 uint64_t carry;
2485 uint64_t unused;
2486 uint64_t lo_value;
2487 uint64_t hi_value = 0;
2488 int sgnb = bcd_get_sgn(b);
2489 int invalid = (sgnb == 0);
2490
2491 lo_value = bcd_get_digit(b, 31, &invalid);
2492 for (i = 30; i > 0; i--) {
2493 mulu64(&lo_value, &carry, lo_value, 10ULL);
2494 mulu64(&hi_value, &unused, hi_value, 10ULL);
2495 lo_value += bcd_get_digit(b, i, &invalid);
2496 hi_value += carry;
2497
2498 if (unlikely(invalid)) {
2499 break;
2500 }
2501 }
2502
2503 if (sgnb == -1) {
2504 r->VsrSD(1) = -lo_value;
2505 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2506 } else {
2507 r->VsrSD(1) = lo_value;
2508 r->VsrSD(0) = hi_value;
2509 }
2510
2511 cr = bcd_cmp_zero(b);
2512
2513 if (unlikely(invalid)) {
2514 cr = CRF_SO;
2515 }
2516
2517 return cr;
2518 }
2519
2520 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2521 {
2522 int i;
2523 int invalid = 0;
2524
2525 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2526 return CRF_SO;
2527 }
2528
2529 *r = *a;
2530 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2531
2532 for (i = 1; i < 32; i++) {
2533 bcd_get_digit(a, i, &invalid);
2534 bcd_get_digit(b, i, &invalid);
2535 if (unlikely(invalid)) {
2536 return CRF_SO;
2537 }
2538 }
2539
2540 return bcd_cmp_zero(r);
2541 }
2542
2543 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2544 {
2545 int sgnb = bcd_get_sgn(b);
2546
2547 *r = *b;
2548 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2549
2550 if (bcd_is_valid(b) == false) {
2551 return CRF_SO;
2552 }
2553
2554 return bcd_cmp_zero(r);
2555 }
2556
2557 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2558 {
2559 int cr;
2560 int i = a->VsrSB(7);
2561 bool ox_flag = false;
2562 int sgnb = bcd_get_sgn(b);
2563 ppc_avr_t ret = *b;
2564 ret.VsrD(1) &= ~0xf;
2565
2566 if (bcd_is_valid(b) == false) {
2567 return CRF_SO;
2568 }
2569
2570 if (unlikely(i > 31)) {
2571 i = 31;
2572 } else if (unlikely(i < -31)) {
2573 i = -31;
2574 }
2575
2576 if (i > 0) {
2577 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2578 } else {
2579 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2580 }
2581 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2582
2583 *r = ret;
2584
2585 cr = bcd_cmp_zero(r);
2586 if (ox_flag) {
2587 cr |= CRF_SO;
2588 }
2589
2590 return cr;
2591 }
2592
2593 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2594 {
2595 int cr;
2596 int i;
2597 int invalid = 0;
2598 bool ox_flag = false;
2599 ppc_avr_t ret = *b;
2600
2601 for (i = 0; i < 32; i++) {
2602 bcd_get_digit(b, i, &invalid);
2603
2604 if (unlikely(invalid)) {
2605 return CRF_SO;
2606 }
2607 }
2608
2609 i = a->VsrSB(7);
2610 if (i >= 32) {
2611 ox_flag = true;
2612 ret.VsrD(1) = ret.VsrD(0) = 0;
2613 } else if (i <= -32) {
2614 ret.VsrD(1) = ret.VsrD(0) = 0;
2615 } else if (i > 0) {
2616 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2617 } else {
2618 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2619 }
2620 *r = ret;
2621
2622 cr = bcd_cmp_zero(r);
2623 if (ox_flag) {
2624 cr |= CRF_SO;
2625 }
2626
2627 return cr;
2628 }
2629
2630 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2631 {
2632 int cr;
2633 int unused = 0;
2634 int invalid = 0;
2635 bool ox_flag = false;
2636 int sgnb = bcd_get_sgn(b);
2637 ppc_avr_t ret = *b;
2638 ret.VsrD(1) &= ~0xf;
2639
2640 int i = a->VsrSB(7);
2641 ppc_avr_t bcd_one;
2642
2643 bcd_one.VsrD(0) = 0;
2644 bcd_one.VsrD(1) = 0x10;
2645
2646 if (bcd_is_valid(b) == false) {
2647 return CRF_SO;
2648 }
2649
2650 if (unlikely(i > 31)) {
2651 i = 31;
2652 } else if (unlikely(i < -31)) {
2653 i = -31;
2654 }
2655
2656 if (i > 0) {
2657 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2658 } else {
2659 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2660
2661 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2662 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2663 }
2664 }
2665 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2666
2667 cr = bcd_cmp_zero(&ret);
2668 if (ox_flag) {
2669 cr |= CRF_SO;
2670 }
2671 *r = ret;
2672
2673 return cr;
2674 }
2675
2676 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2677 {
2678 uint64_t mask;
2679 uint32_t ox_flag = 0;
2680 int i = a->VsrSH(3) + 1;
2681 ppc_avr_t ret = *b;
2682
2683 if (bcd_is_valid(b) == false) {
2684 return CRF_SO;
2685 }
2686
2687 if (i > 16 && i < 32) {
2688 mask = (uint64_t)-1 >> (128 - i * 4);
2689 if (ret.VsrD(0) & ~mask) {
2690 ox_flag = CRF_SO;
2691 }
2692
2693 ret.VsrD(0) &= mask;
2694 } else if (i >= 0 && i <= 16) {
2695 mask = (uint64_t)-1 >> (64 - i * 4);
2696 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2697 ox_flag = CRF_SO;
2698 }
2699
2700 ret.VsrD(1) &= mask;
2701 ret.VsrD(0) = 0;
2702 }
2703 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2704 *r = ret;
2705
2706 return bcd_cmp_zero(&ret) | ox_flag;
2707 }
2708
2709 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2710 {
2711 int i;
2712 uint64_t mask;
2713 uint32_t ox_flag = 0;
2714 int invalid = 0;
2715 ppc_avr_t ret = *b;
2716
2717 for (i = 0; i < 32; i++) {
2718 bcd_get_digit(b, i, &invalid);
2719
2720 if (unlikely(invalid)) {
2721 return CRF_SO;
2722 }
2723 }
2724
2725 i = a->VsrSH(3);
2726 if (i > 16 && i < 33) {
2727 mask = (uint64_t)-1 >> (128 - i * 4);
2728 if (ret.VsrD(0) & ~mask) {
2729 ox_flag = CRF_SO;
2730 }
2731
2732 ret.VsrD(0) &= mask;
2733 } else if (i > 0 && i <= 16) {
2734 mask = (uint64_t)-1 >> (64 - i * 4);
2735 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2736 ox_flag = CRF_SO;
2737 }
2738
2739 ret.VsrD(1) &= mask;
2740 ret.VsrD(0) = 0;
2741 } else if (i == 0) {
2742 if (ret.VsrD(0) || ret.VsrD(1)) {
2743 ox_flag = CRF_SO;
2744 }
2745 ret.VsrD(0) = ret.VsrD(1) = 0;
2746 }
2747
2748 *r = ret;
2749 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2750 return ox_flag | CRF_EQ;
2751 }
2752
2753 return ox_flag | CRF_GT;
2754 }
2755
2756 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2757 {
2758 int i;
2759 VECTOR_FOR_INORDER_I(i, u8) {
2760 r->u8[i] = AES_sbox[a->u8[i]];
2761 }
2762 }
2763
2764 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2765 {
2766 ppc_avr_t result;
2767 int i;
2768
2769 VECTOR_FOR_INORDER_I(i, u32) {
2770 result.VsrW(i) = b->VsrW(i) ^
2771 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2772 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2773 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2774 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2775 }
2776 *r = result;
2777 }
2778
2779 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2780 {
2781 ppc_avr_t result;
2782 int i;
2783
2784 VECTOR_FOR_INORDER_I(i, u8) {
2785 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2786 }
2787 *r = result;
2788 }
2789
2790 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2791 {
2792 /* This differs from what is written in ISA V2.07. The RTL is */
2793 /* incorrect and will be fixed in V2.07B. */
2794 int i;
2795 ppc_avr_t tmp;
2796
2797 VECTOR_FOR_INORDER_I(i, u8) {
2798 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2799 }
2800
2801 VECTOR_FOR_INORDER_I(i, u32) {
2802 r->VsrW(i) =
2803 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2804 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2805 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2806 AES_imc[tmp.VsrB(4 * i + 3)][3];
2807 }
2808 }
2809
2810 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2811 {
2812 ppc_avr_t result;
2813 int i;
2814
2815 VECTOR_FOR_INORDER_I(i, u8) {
2816 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2817 }
2818 *r = result;
2819 }
2820
2821 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2822 {
2823 int st = (st_six & 0x10) != 0;
2824 int six = st_six & 0xF;
2825 int i;
2826
2827 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2828 if (st == 0) {
2829 if ((six & (0x8 >> i)) == 0) {
2830 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2831 ror32(a->VsrW(i), 18) ^
2832 (a->VsrW(i) >> 3);
2833 } else { /* six.bit[i] == 1 */
2834 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2835 ror32(a->VsrW(i), 19) ^
2836 (a->VsrW(i) >> 10);
2837 }
2838 } else { /* st == 1 */
2839 if ((six & (0x8 >> i)) == 0) {
2840 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2841 ror32(a->VsrW(i), 13) ^
2842 ror32(a->VsrW(i), 22);
2843 } else { /* six.bit[i] == 1 */
2844 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2845 ror32(a->VsrW(i), 11) ^
2846 ror32(a->VsrW(i), 25);
2847 }
2848 }
2849 }
2850 }
2851
2852 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2853 {
2854 int st = (st_six & 0x10) != 0;
2855 int six = st_six & 0xF;
2856 int i;
2857
2858 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2859 if (st == 0) {
2860 if ((six & (0x8 >> (2 * i))) == 0) {
2861 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2862 ror64(a->VsrD(i), 8) ^
2863 (a->VsrD(i) >> 7);
2864 } else { /* six.bit[2*i] == 1 */
2865 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2866 ror64(a->VsrD(i), 61) ^
2867 (a->VsrD(i) >> 6);
2868 }
2869 } else { /* st == 1 */
2870 if ((six & (0x8 >> (2 * i))) == 0) {
2871 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2872 ror64(a->VsrD(i), 34) ^
2873 ror64(a->VsrD(i), 39);
2874 } else { /* six.bit[2*i] == 1 */
2875 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2876 ror64(a->VsrD(i), 18) ^
2877 ror64(a->VsrD(i), 41);
2878 }
2879 }
2880 }
2881 }
2882
2883 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2884 {
2885 ppc_avr_t result;
2886 int i;
2887
2888 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2889 int indexA = c->VsrB(i) >> 4;
2890 int indexB = c->VsrB(i) & 0xF;
2891
2892 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2893 }
2894 *r = result;
2895 }
2896
2897 #undef VECTOR_FOR_INORDER_I
2898
2899 /*****************************************************************************/
2900 /* SPE extension helpers */
2901 /* Use a table to make this quicker */
2902 static const uint8_t hbrev[16] = {
2903 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2904 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2905 };
2906
2907 static inline uint8_t byte_reverse(uint8_t val)
2908 {
2909 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2910 }
2911
2912 static inline uint32_t word_reverse(uint32_t val)
2913 {
2914 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2915 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2916 }
2917
2918 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2919 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2920 {
2921 uint32_t a, b, d, mask;
2922
2923 mask = UINT32_MAX >> (32 - MASKBITS);
2924 a = arg1 & mask;
2925 b = arg2 & mask;
2926 d = word_reverse(1 + word_reverse(a | ~b));
2927 return (arg1 & ~mask) | (d & b);
2928 }
2929
2930 uint32_t helper_cntlsw32(uint32_t val)
2931 {
2932 if (val & 0x80000000) {
2933 return clz32(~val);
2934 } else {
2935 return clz32(val);
2936 }
2937 }
2938
2939 uint32_t helper_cntlzw32(uint32_t val)
2940 {
2941 return clz32(val);
2942 }
2943
2944 /* 440 specific */
2945 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2946 target_ulong low, uint32_t update_Rc)
2947 {
2948 target_ulong mask;
2949 int i;
2950
2951 i = 1;
2952 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2953 if ((high & mask) == 0) {
2954 if (update_Rc) {
2955 env->crf[0] = 0x4;
2956 }
2957 goto done;
2958 }
2959 i++;
2960 }
2961 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2962 if ((low & mask) == 0) {
2963 if (update_Rc) {
2964 env->crf[0] = 0x8;
2965 }
2966 goto done;
2967 }
2968 i++;
2969 }
2970 i = 8;
2971 if (update_Rc) {
2972 env->crf[0] = 0x2;
2973 }
2974 done:
2975 env->xer = (env->xer & ~0x7F) | i;
2976 if (update_Rc) {
2977 env->crf[0] |= xer_so;
2978 }
2979 return i;
2980 }