]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi to decodetree
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
35
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 {
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
42 }
43 }
44
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
47 {
48 uint64_t rt = 0;
49 int overflow = 0;
50
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
53
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
59 }
60
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
63 }
64
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
67 }
68
69 return (target_ulong)rt;
70 }
71
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
74 {
75 int64_t rt = 0;
76 int overflow = 0;
77
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
80
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
87 }
88
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
91 }
92
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
95 }
96
97 return (target_ulong)rt;
98 }
99
100 #if defined(TARGET_PPC64)
101
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 {
104 uint64_t rt = 0;
105 int overflow = 0;
106
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
112 }
113
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
116 }
117
118 return rt;
119 }
120
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 {
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
127
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
133 }
134
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
137 }
138
139 return rt;
140 }
141
142 #endif
143
144
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
148
149 /*
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
154 */
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
156
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
159
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
161 {
162 return hasvalue(rb, ra) ? CRF_GT : 0;
163 }
164
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
168
169 /*
170 * Return a random number.
171 */
172 uint64_t helper_darn32(void)
173 {
174 Error *err = NULL;
175 uint32_t ret;
176
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
182 }
183
184 return ret;
185 }
186
187 uint64_t helper_darn64(void)
188 {
189 Error *err = NULL;
190 uint64_t ret;
191
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
197 }
198
199 return ret;
200 }
201
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
203 {
204 int i;
205 uint64_t ra = 0;
206
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
212 }
213 }
214 }
215 return ra;
216 }
217
218 #endif
219
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
221 {
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
225
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
229 }
230 mask <<= 8;
231 }
232 return ra;
233 }
234
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
238 {
239 int32_t ret;
240
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
249 }
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
253 }
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
257 }
258 return (target_long)ret;
259 }
260
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
264 {
265 int64_t ret;
266
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
275 }
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
279 }
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
283 }
284 return ret;
285 }
286 #endif
287
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
299 }
300
301 target_ulong helper_popcntw(target_ulong val)
302 {
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
315 }
316 #else
317 target_ulong helper_popcntb(target_ulong val)
318 {
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
324 }
325 #endif
326
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
328 {
329 /*
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
334 */
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
338
339 if (mask == 0 || mask == -1) {
340 return src;
341 }
342
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
349 }
350
351 /*
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
355 */
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
361 }
362
363 /*
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
368 */
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
374 }
375
376 /*
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
379 */
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
384 }
385
386 return left | (right >> n);
387 }
388
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
390 {
391 int i, o;
392 uint64_t result = 0;
393
394 if (mask == -1) {
395 return src;
396 }
397
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
402 }
403
404 return result;
405 }
406
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
408 {
409 int i, o;
410 uint64_t result = 0;
411
412 if (mask == -1) {
413 return src;
414 }
415
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
420 }
421
422 return result;
423 }
424
425 /*****************************************************************************/
426 /* Altivec extension helpers */
427 #if defined(HOST_WORDS_BIGENDIAN)
428 #define VECTOR_FOR_INORDER_I(index, element) \
429 for (index = 0; index < ARRAY_SIZE(r->element); index++)
430 #else
431 #define VECTOR_FOR_INORDER_I(index, element) \
432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
433 #endif
434
435 /* Saturating arithmetic helpers. */
436 #define SATCVT(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x < (from_type)min) { \
442 r = min; \
443 *sat = 1; \
444 } else if (x > (from_type)max) { \
445 r = max; \
446 *sat = 1; \
447 } else { \
448 r = x; \
449 } \
450 return r; \
451 }
452 #define SATCVTU(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
454 { \
455 to_type r; \
456 \
457 if (x > (from_type)max) { \
458 r = max; \
459 *sat = 1; \
460 } else { \
461 r = x; \
462 } \
463 return r; \
464 }
465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
468
469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
475 #undef SATCVT
476 #undef SATCVTU
477
478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
479 {
480 ppc_store_vscr(env, vscr);
481 }
482
483 uint32_t helper_mfvscr(CPUPPCState *env)
484 {
485 return ppc_get_vscr(env);
486 }
487
488 static inline void set_vscr_sat(CPUPPCState *env)
489 {
490 /* The choice of non-zero value is arbitrary. */
491 env->vscr_sat.u32[0] = 1;
492 }
493
494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
495 {
496 int i;
497
498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
499 r->u32[i] = ~a->u32[i] < b->u32[i];
500 }
501 }
502
503 /* vprtybw */
504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
505 {
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
509 res ^= res >> 8;
510 r->u32[i] = res & 1;
511 }
512 }
513
514 /* vprtybd */
515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
516 {
517 int i;
518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->u64[i] = res & 1;
523 }
524 }
525
526 /* vprtybq */
527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
528 {
529 uint64_t res = b->u64[0] ^ b->u64[1];
530 res ^= res >> 32;
531 res ^= res >> 16;
532 res ^= res >> 8;
533 r->VsrD(1) = res & 1;
534 r->VsrD(0) = 0;
535 }
536
537 #define VARITHFP(suffix, func) \
538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
540 { \
541 int i; \
542 \
543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
545 } \
546 }
547 VARITHFP(addfp, float32_add)
548 VARITHFP(subfp, float32_sub)
549 VARITHFP(minfp, float32_min)
550 VARITHFP(maxfp, float32_max)
551 #undef VARITHFP
552
553 #define VARITHFPFMA(suffix, type) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b, ppc_avr_t *c) \
556 { \
557 int i; \
558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
560 type, &env->vec_status); \
561 } \
562 }
563 VARITHFPFMA(maddfp, 0);
564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
565 #undef VARITHFPFMA
566
567 #define VARITHSAT_CASE(type, op, cvt, element) \
568 { \
569 type result = (type)a->element[i] op (type)b->element[i]; \
570 r->element[i] = cvt(result, &sat); \
571 }
572
573 #define VARITHSAT_DO(name, op, optype, cvt, element) \
574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
576 { \
577 int sat = 0; \
578 int i; \
579 \
580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
581 VARITHSAT_CASE(optype, op, cvt, element); \
582 } \
583 if (sat) { \
584 vscr_sat->u32[0] = 1; \
585 } \
586 }
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
600 #undef VARITHSAT_DO
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
603
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
606 { \
607 int i; \
608 \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
612 } \
613 }
614
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
616 unsigned_type) \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
622 #undef VAVG_DO
623 #undef VAVG
624
625 #define VABSDU_DO(name, element) \
626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
627 { \
628 int i; \
629 \
630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
631 r->element[i] = (a->element[i] > b->element[i]) ? \
632 (a->element[i] - b->element[i]) : \
633 (b->element[i] - a->element[i]); \
634 } \
635 }
636
637 /*
638 * VABSDU - Vector absolute difference unsigned
639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
640 * element - element type to access from vector
641 */
642 #define VABSDU(type, element) \
643 VABSDU_DO(absdu##type, element)
644 VABSDU(b, u8)
645 VABSDU(h, u16)
646 VABSDU(w, u32)
647 #undef VABSDU_DO
648 #undef VABSDU
649
650 #define VCF(suffix, cvt, element) \
651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
652 ppc_avr_t *b, uint32_t uim) \
653 { \
654 int i; \
655 \
656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
657 float32 t = cvt(b->element[i], &env->vec_status); \
658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
659 } \
660 }
661 VCF(ux, uint32_to_float32, u32)
662 VCF(sx, int32_to_float32, s32)
663 #undef VCF
664
665 #define VCMPNEZ(NAME, ELEM) \
666 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
667 { \
668 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
669 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
670 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
671 } \
672 }
673 VCMPNEZ(VCMPNEZB, u8)
674 VCMPNEZ(VCMPNEZH, u16)
675 VCMPNEZ(VCMPNEZW, u32)
676 #undef VCMPNEZ
677
678 #define VCMPFP_DO(suffix, compare, order, record) \
679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
680 ppc_avr_t *a, ppc_avr_t *b) \
681 { \
682 uint32_t ones = (uint32_t)-1; \
683 uint32_t all = ones; \
684 uint32_t none = 0; \
685 int i; \
686 \
687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
688 uint32_t result; \
689 FloatRelation rel = \
690 float32_compare_quiet(a->f32[i], b->f32[i], \
691 &env->vec_status); \
692 if (rel == float_relation_unordered) { \
693 result = 0; \
694 } else if (rel compare order) { \
695 result = ones; \
696 } else { \
697 result = 0; \
698 } \
699 r->u32[i] = result; \
700 all &= result; \
701 none |= result; \
702 } \
703 if (record) { \
704 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
705 } \
706 }
707 #define VCMPFP(suffix, compare, order) \
708 VCMPFP_DO(suffix, compare, order, 0) \
709 VCMPFP_DO(suffix##_dot, compare, order, 1)
710 VCMPFP(eqfp, ==, float_relation_equal)
711 VCMPFP(gefp, !=, float_relation_less)
712 VCMPFP(gtfp, ==, float_relation_greater)
713 #undef VCMPFP_DO
714 #undef VCMPFP
715
716 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
717 ppc_avr_t *a, ppc_avr_t *b, int record)
718 {
719 int i;
720 int all_in = 0;
721
722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
723 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
724 &env->vec_status);
725 if (le_rel == float_relation_unordered) {
726 r->u32[i] = 0xc0000000;
727 all_in = 1;
728 } else {
729 float32 bneg = float32_chs(b->f32[i]);
730 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
731 &env->vec_status);
732 int le = le_rel != float_relation_greater;
733 int ge = ge_rel != float_relation_less;
734
735 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
736 all_in |= (!le | !ge);
737 }
738 }
739 if (record) {
740 env->crf[6] = (all_in == 0) << 1;
741 }
742 }
743
744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
745 {
746 vcmpbfp_internal(env, r, a, b, 0);
747 }
748
749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
750 ppc_avr_t *b)
751 {
752 vcmpbfp_internal(env, r, a, b, 1);
753 }
754
755 #define VCT(suffix, satcvt, element) \
756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
757 ppc_avr_t *b, uint32_t uim) \
758 { \
759 int i; \
760 int sat = 0; \
761 float_status s = env->vec_status; \
762 \
763 set_float_rounding_mode(float_round_to_zero, &s); \
764 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
765 if (float32_is_any_nan(b->f32[i])) { \
766 r->element[i] = 0; \
767 } else { \
768 float64 t = float32_to_float64(b->f32[i], &s); \
769 int64_t j; \
770 \
771 t = float64_scalbn(t, uim, &s); \
772 j = float64_to_int64(t, &s); \
773 r->element[i] = satcvt(j, &sat); \
774 } \
775 } \
776 if (sat) { \
777 set_vscr_sat(env); \
778 } \
779 }
780 VCT(uxs, cvtsduw, u32)
781 VCT(sxs, cvtsdsw, s32)
782 #undef VCT
783
784 target_ulong helper_vclzlsbb(ppc_avr_t *r)
785 {
786 target_ulong count = 0;
787 int i;
788 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
789 if (r->VsrB(i) & 0x01) {
790 break;
791 }
792 count++;
793 }
794 return count;
795 }
796
797 target_ulong helper_vctzlsbb(ppc_avr_t *r)
798 {
799 target_ulong count = 0;
800 int i;
801 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
802 if (r->VsrB(i) & 0x01) {
803 break;
804 }
805 count++;
806 }
807 return count;
808 }
809
810 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
811 ppc_avr_t *b, ppc_avr_t *c)
812 {
813 int sat = 0;
814 int i;
815
816 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
817 int32_t prod = a->s16[i] * b->s16[i];
818 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
819
820 r->s16[i] = cvtswsh(t, &sat);
821 }
822
823 if (sat) {
824 set_vscr_sat(env);
825 }
826 }
827
828 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
829 ppc_avr_t *b, ppc_avr_t *c)
830 {
831 int sat = 0;
832 int i;
833
834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
835 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
836 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
837 r->s16[i] = cvtswsh(t, &sat);
838 }
839
840 if (sat) {
841 set_vscr_sat(env);
842 }
843 }
844
845 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
846 {
847 int i;
848
849 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
850 int32_t prod = a->s16[i] * b->s16[i];
851 r->s16[i] = (int16_t) (prod + c->s16[i]);
852 }
853 }
854
855 #define VMRG_DO(name, element, access, ofs) \
856 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
857 { \
858 ppc_avr_t result; \
859 int i, half = ARRAY_SIZE(r->element) / 2; \
860 \
861 for (i = 0; i < half; i++) { \
862 result.access(i * 2 + 0) = a->access(i + ofs); \
863 result.access(i * 2 + 1) = b->access(i + ofs); \
864 } \
865 *r = result; \
866 }
867
868 #define VMRG(suffix, element, access) \
869 VMRG_DO(mrgl##suffix, element, access, half) \
870 VMRG_DO(mrgh##suffix, element, access, 0)
871 VMRG(b, u8, VsrB)
872 VMRG(h, u16, VsrH)
873 VMRG(w, u32, VsrW)
874 #undef VMRG_DO
875 #undef VMRG
876
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
879 {
880 int32_t prod[16];
881 int i;
882
883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884 prod[i] = (int32_t)a->s8[i] * b->u8[i];
885 }
886
887 VECTOR_FOR_INORDER_I(i, s32) {
888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889 prod[4 * i + 2] + prod[4 * i + 3];
890 }
891 }
892
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
895 {
896 int32_t prod[8];
897 int i;
898
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 prod[i] = a->s16[i] * b->s16[i];
901 }
902
903 VECTOR_FOR_INORDER_I(i, s32) {
904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
905 }
906 }
907
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909 ppc_avr_t *b, ppc_avr_t *c)
910 {
911 int32_t prod[8];
912 int i;
913 int sat = 0;
914
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 prod[i] = (int32_t)a->s16[i] * b->s16[i];
917 }
918
919 VECTOR_FOR_INORDER_I(i, s32) {
920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
921
922 r->u32[i] = cvtsdsw(t, &sat);
923 }
924
925 if (sat) {
926 set_vscr_sat(env);
927 }
928 }
929
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931 ppc_avr_t *b, ppc_avr_t *c)
932 {
933 uint16_t prod[16];
934 int i;
935
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 prod[i] = a->u8[i] * b->u8[i];
938 }
939
940 VECTOR_FOR_INORDER_I(i, u32) {
941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942 prod[4 * i + 2] + prod[4 * i + 3];
943 }
944 }
945
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947 ppc_avr_t *b, ppc_avr_t *c)
948 {
949 uint32_t prod[8];
950 int i;
951
952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953 prod[i] = a->u16[i] * b->u16[i];
954 }
955
956 VECTOR_FOR_INORDER_I(i, u32) {
957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
958 }
959 }
960
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
963 {
964 uint32_t prod[8];
965 int i;
966 int sat = 0;
967
968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969 prod[i] = a->u16[i] * b->u16[i];
970 }
971
972 VECTOR_FOR_INORDER_I(i, s32) {
973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
974
975 r->u32[i] = cvtuduw(t, &sat);
976 }
977
978 if (sat) {
979 set_vscr_sat(env);
980 }
981 }
982
983 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
984 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
985 { \
986 int i; \
987 \
988 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
989 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
990 (cast)b->mul_access(i); \
991 } \
992 }
993
994 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
995 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
996 { \
997 int i; \
998 \
999 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1000 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1001 (cast)b->mul_access(i + 1); \
1002 } \
1003 }
1004
1005 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1006 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1008 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1009 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1010 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1011 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1012 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1013 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1014 #undef VMUL_DO_EVN
1015 #undef VMUL_DO_ODD
1016 #undef VMUL
1017
1018 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1019 ppc_avr_t *c)
1020 {
1021 ppc_avr_t result;
1022 int i;
1023
1024 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1025 int s = c->VsrB(i) & 0x1f;
1026 int index = s & 0xf;
1027
1028 if (s & 0x10) {
1029 result.VsrB(i) = b->VsrB(index);
1030 } else {
1031 result.VsrB(i) = a->VsrB(index);
1032 }
1033 }
1034 *r = result;
1035 }
1036
1037 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1038 ppc_avr_t *c)
1039 {
1040 ppc_avr_t result;
1041 int i;
1042
1043 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1044 int s = c->VsrB(i) & 0x1f;
1045 int index = 15 - (s & 0xf);
1046
1047 if (s & 0x10) {
1048 result.VsrB(i) = a->VsrB(index);
1049 } else {
1050 result.VsrB(i) = b->VsrB(index);
1051 }
1052 }
1053 *r = result;
1054 }
1055
1056 #if defined(HOST_WORDS_BIGENDIAN)
1057 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1058 #define VBPERMD_INDEX(i) (i)
1059 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1060 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1061 #else
1062 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1063 #define VBPERMD_INDEX(i) (1 - i)
1064 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1065 #define EXTRACT_BIT(avr, i, index) \
1066 (extract64((avr)->u64[1 - i], 63 - index, 1))
1067 #endif
1068
1069 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1070 {
1071 int i, j;
1072 ppc_avr_t result = { .u64 = { 0, 0 } };
1073 VECTOR_FOR_INORDER_I(i, u64) {
1074 for (j = 0; j < 8; j++) {
1075 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1076 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1077 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1078 }
1079 }
1080 }
1081 *r = result;
1082 }
1083
1084 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1085 {
1086 int i;
1087 uint64_t perm = 0;
1088
1089 VECTOR_FOR_INORDER_I(i, u8) {
1090 int index = VBPERMQ_INDEX(b, i);
1091
1092 if (index < 128) {
1093 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1094 if (a->u64[VBPERMQ_DW(index)] & mask) {
1095 perm |= (0x8000 >> i);
1096 }
1097 }
1098 }
1099
1100 r->VsrD(0) = perm;
1101 r->VsrD(1) = 0;
1102 }
1103
1104 #undef VBPERMQ_INDEX
1105 #undef VBPERMQ_DW
1106
1107 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1108 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1109 { \
1110 int i, j; \
1111 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1112 \
1113 VECTOR_FOR_INORDER_I(i, srcfld) { \
1114 prod[i] = 0; \
1115 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1116 if (a->srcfld[i] & (1ull << j)) { \
1117 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1118 } \
1119 } \
1120 } \
1121 \
1122 VECTOR_FOR_INORDER_I(i, trgfld) { \
1123 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1124 } \
1125 }
1126
1127 PMSUM(vpmsumb, u8, u16, uint16_t)
1128 PMSUM(vpmsumh, u16, u32, uint32_t)
1129 PMSUM(vpmsumw, u32, u64, uint64_t)
1130
1131 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1132 {
1133
1134 #ifdef CONFIG_INT128
1135 int i, j;
1136 __uint128_t prod[2];
1137
1138 VECTOR_FOR_INORDER_I(i, u64) {
1139 prod[i] = 0;
1140 for (j = 0; j < 64; j++) {
1141 if (a->u64[i] & (1ull << j)) {
1142 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1143 }
1144 }
1145 }
1146
1147 r->u128 = prod[0] ^ prod[1];
1148
1149 #else
1150 int i, j;
1151 ppc_avr_t prod[2];
1152
1153 VECTOR_FOR_INORDER_I(i, u64) {
1154 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1155 for (j = 0; j < 64; j++) {
1156 if (a->u64[i] & (1ull << j)) {
1157 ppc_avr_t bshift;
1158 if (j == 0) {
1159 bshift.VsrD(0) = 0;
1160 bshift.VsrD(1) = b->u64[i];
1161 } else {
1162 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1163 bshift.VsrD(1) = b->u64[i] << j;
1164 }
1165 prod[i].VsrD(1) ^= bshift.VsrD(1);
1166 prod[i].VsrD(0) ^= bshift.VsrD(0);
1167 }
1168 }
1169 }
1170
1171 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1172 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1173 #endif
1174 }
1175
1176
1177 #if defined(HOST_WORDS_BIGENDIAN)
1178 #define PKBIG 1
1179 #else
1180 #define PKBIG 0
1181 #endif
1182 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1183 {
1184 int i, j;
1185 ppc_avr_t result;
1186 #if defined(HOST_WORDS_BIGENDIAN)
1187 const ppc_avr_t *x[2] = { a, b };
1188 #else
1189 const ppc_avr_t *x[2] = { b, a };
1190 #endif
1191
1192 VECTOR_FOR_INORDER_I(i, u64) {
1193 VECTOR_FOR_INORDER_I(j, u32) {
1194 uint32_t e = x[i]->u32[j];
1195
1196 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1197 ((e >> 6) & 0x3e0) |
1198 ((e >> 3) & 0x1f));
1199 }
1200 }
1201 *r = result;
1202 }
1203
1204 #define VPK(suffix, from, to, cvt, dosat) \
1205 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1206 ppc_avr_t *a, ppc_avr_t *b) \
1207 { \
1208 int i; \
1209 int sat = 0; \
1210 ppc_avr_t result; \
1211 ppc_avr_t *a0 = PKBIG ? a : b; \
1212 ppc_avr_t *a1 = PKBIG ? b : a; \
1213 \
1214 VECTOR_FOR_INORDER_I(i, from) { \
1215 result.to[i] = cvt(a0->from[i], &sat); \
1216 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1217 } \
1218 *r = result; \
1219 if (dosat && sat) { \
1220 set_vscr_sat(env); \
1221 } \
1222 }
1223 #define I(x, y) (x)
1224 VPK(shss, s16, s8, cvtshsb, 1)
1225 VPK(shus, s16, u8, cvtshub, 1)
1226 VPK(swss, s32, s16, cvtswsh, 1)
1227 VPK(swus, s32, u16, cvtswuh, 1)
1228 VPK(sdss, s64, s32, cvtsdsw, 1)
1229 VPK(sdus, s64, u32, cvtsduw, 1)
1230 VPK(uhus, u16, u8, cvtuhub, 1)
1231 VPK(uwus, u32, u16, cvtuwuh, 1)
1232 VPK(udus, u64, u32, cvtuduw, 1)
1233 VPK(uhum, u16, u8, I, 0)
1234 VPK(uwum, u32, u16, I, 0)
1235 VPK(udum, u64, u32, I, 0)
1236 #undef I
1237 #undef VPK
1238 #undef PKBIG
1239
1240 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1241 {
1242 int i;
1243
1244 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1245 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1246 }
1247 }
1248
1249 #define VRFI(suffix, rounding) \
1250 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1251 ppc_avr_t *b) \
1252 { \
1253 int i; \
1254 float_status s = env->vec_status; \
1255 \
1256 set_float_rounding_mode(rounding, &s); \
1257 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1258 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1259 } \
1260 }
1261 VRFI(n, float_round_nearest_even)
1262 VRFI(m, float_round_down)
1263 VRFI(p, float_round_up)
1264 VRFI(z, float_round_to_zero)
1265 #undef VRFI
1266
1267 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1268 {
1269 int i;
1270
1271 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1272 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1273
1274 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1275 }
1276 }
1277
1278 #define VRLMI(name, size, element, insert) \
1279 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1280 { \
1281 int i; \
1282 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1283 uint##size##_t src1 = a->element[i]; \
1284 uint##size##_t src2 = b->element[i]; \
1285 uint##size##_t src3 = r->element[i]; \
1286 uint##size##_t begin, end, shift, mask, rot_val; \
1287 \
1288 shift = extract##size(src2, 0, 6); \
1289 end = extract##size(src2, 8, 6); \
1290 begin = extract##size(src2, 16, 6); \
1291 rot_val = rol##size(src1, shift); \
1292 mask = mask_u##size(begin, end); \
1293 if (insert) { \
1294 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1295 } else { \
1296 r->element[i] = (rot_val & mask); \
1297 } \
1298 } \
1299 }
1300
1301 VRLMI(VRLDMI, 64, u64, 1);
1302 VRLMI(VRLWMI, 32, u32, 1);
1303 VRLMI(VRLDNM, 64, u64, 0);
1304 VRLMI(VRLWNM, 32, u32, 0);
1305
1306 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1307 ppc_avr_t *c)
1308 {
1309 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1310 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1311 }
1312
1313 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1314 {
1315 int i;
1316
1317 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1318 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1319 }
1320 }
1321
1322 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1323 {
1324 int i;
1325
1326 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1327 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1328 }
1329 }
1330
1331 #define VEXTU_X_DO(name, size, left) \
1332 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1333 { \
1334 int index = (a & 0xf) * 8; \
1335 if (left) { \
1336 index = 128 - index - size; \
1337 } \
1338 return int128_getlo(int128_rshift(b->s128, index)) & \
1339 MAKE_64BIT_MASK(0, size); \
1340 }
1341 VEXTU_X_DO(vextublx, 8, 1)
1342 VEXTU_X_DO(vextuhlx, 16, 1)
1343 VEXTU_X_DO(vextuwlx, 32, 1)
1344 VEXTU_X_DO(vextubrx, 8, 0)
1345 VEXTU_X_DO(vextuhrx, 16, 0)
1346 VEXTU_X_DO(vextuwrx, 32, 0)
1347 #undef VEXTU_X_DO
1348
1349 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1350 {
1351 int i;
1352 unsigned int shift, bytes, size;
1353
1354 size = ARRAY_SIZE(r->u8);
1355 for (i = 0; i < size; i++) {
1356 shift = b->VsrB(i) & 0x7; /* extract shift value */
1357 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1358 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1359 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1360 }
1361 }
1362
1363 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1364 {
1365 int i;
1366 unsigned int shift, bytes;
1367
1368 /*
1369 * Use reverse order, as destination and source register can be
1370 * same. Its being modified in place saving temporary, reverse
1371 * order will guarantee that computed result is not fed back.
1372 */
1373 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1374 shift = b->VsrB(i) & 0x7; /* extract shift value */
1375 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1376 /* extract adjacent bytes */
1377 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1378 }
1379 }
1380
1381 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1382 {
1383 int sh = shift & 0xf;
1384 int i;
1385 ppc_avr_t result;
1386
1387 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1388 int index = sh + i;
1389 if (index > 0xf) {
1390 result.VsrB(i) = b->VsrB(index - 0x10);
1391 } else {
1392 result.VsrB(i) = a->VsrB(index);
1393 }
1394 }
1395 *r = result;
1396 }
1397
1398 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1399 {
1400 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1401
1402 #if defined(HOST_WORDS_BIGENDIAN)
1403 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1404 memset(&r->u8[16 - sh], 0, sh);
1405 #else
1406 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1407 memset(&r->u8[0], 0, sh);
1408 #endif
1409 }
1410
1411 #if defined(HOST_WORDS_BIGENDIAN)
1412 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1413 #else
1414 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1415 #endif
1416
1417 #define VINSX(SUFFIX, TYPE) \
1418 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1419 uint64_t val, target_ulong index) \
1420 { \
1421 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1422 target_long idx = index; \
1423 \
1424 if (idx < 0 || idx > maxidx) { \
1425 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1426 qemu_log_mask(LOG_GUEST_ERROR, \
1427 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1428 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1429 } else { \
1430 TYPE src = val; \
1431 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1432 } \
1433 }
1434 VINSX(B, uint8_t)
1435 VINSX(H, uint16_t)
1436 VINSX(W, uint32_t)
1437 VINSX(D, uint64_t)
1438 #undef ELEM_ADDR
1439 #undef VINSX
1440 #if defined(HOST_WORDS_BIGENDIAN)
1441 #define VEXTDVLX(NAME, SIZE) \
1442 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1443 target_ulong index) \
1444 { \
1445 const target_long idx = index; \
1446 ppc_avr_t tmp[2] = { *a, *b }; \
1447 memset(t, 0, sizeof(*t)); \
1448 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1449 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1450 } else { \
1451 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1452 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1453 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1454 } \
1455 }
1456 #else
1457 #define VEXTDVLX(NAME, SIZE) \
1458 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1459 target_ulong index) \
1460 { \
1461 const target_long idx = index; \
1462 ppc_avr_t tmp[2] = { *b, *a }; \
1463 memset(t, 0, sizeof(*t)); \
1464 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1465 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1466 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1467 } else { \
1468 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1469 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1470 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1471 } \
1472 }
1473 #endif
1474 VEXTDVLX(VEXTDUBVLX, 1)
1475 VEXTDVLX(VEXTDUHVLX, 2)
1476 VEXTDVLX(VEXTDUWVLX, 4)
1477 VEXTDVLX(VEXTDDVLX, 8)
1478 #undef VEXTDVLX
1479 #if defined(HOST_WORDS_BIGENDIAN)
1480 #define VEXTRACT(suffix, element) \
1481 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1482 { \
1483 uint32_t es = sizeof(r->element[0]); \
1484 memmove(&r->u8[8 - es], &b->u8[index], es); \
1485 memset(&r->u8[8], 0, 8); \
1486 memset(&r->u8[0], 0, 8 - es); \
1487 }
1488 #else
1489 #define VEXTRACT(suffix, element) \
1490 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1491 { \
1492 uint32_t es = sizeof(r->element[0]); \
1493 uint32_t s = (16 - index) - es; \
1494 memmove(&r->u8[8], &b->u8[s], es); \
1495 memset(&r->u8[0], 0, 8); \
1496 memset(&r->u8[8 + es], 0, 8 - es); \
1497 }
1498 #endif
1499 VEXTRACT(ub, u8)
1500 VEXTRACT(uh, u16)
1501 VEXTRACT(uw, u32)
1502 VEXTRACT(d, u64)
1503 #undef VEXTRACT
1504
1505 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1506 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1507 { \
1508 int i, idx, crf = 0; \
1509 \
1510 for (i = 0; i < NUM_ELEMS; i++) { \
1511 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1512 if (b->Vsr##ELEM(idx)) { \
1513 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1514 } else { \
1515 crf = 0b0010; \
1516 break; \
1517 } \
1518 } \
1519 \
1520 for (; i < NUM_ELEMS; i++) { \
1521 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1522 t->Vsr##ELEM(idx) = 0; \
1523 } \
1524 \
1525 return crf; \
1526 }
1527 VSTRI(VSTRIBL, B, 16, true)
1528 VSTRI(VSTRIBR, B, 16, false)
1529 VSTRI(VSTRIHL, H, 8, true)
1530 VSTRI(VSTRIHR, H, 8, false)
1531 #undef VSTRI
1532
1533 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1534 ppc_vsr_t *xb, uint32_t index)
1535 {
1536 ppc_vsr_t t = { };
1537 size_t es = sizeof(uint32_t);
1538 uint32_t ext_index;
1539 int i;
1540
1541 ext_index = index;
1542 for (i = 0; i < es; i++, ext_index++) {
1543 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1544 }
1545
1546 *xt = t;
1547 }
1548
1549 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1550 ppc_vsr_t *xb, uint32_t index)
1551 {
1552 ppc_vsr_t t = *xt;
1553 size_t es = sizeof(uint32_t);
1554 int ins_index, i = 0;
1555
1556 ins_index = index;
1557 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1558 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1559 }
1560
1561 *xt = t;
1562 }
1563
1564 #define XXBLEND(name, sz) \
1565 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1566 ppc_avr_t *c, uint32_t desc) \
1567 { \
1568 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1569 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1570 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1571 } \
1572 }
1573 XXBLEND(B, 8)
1574 XXBLEND(H, 16)
1575 XXBLEND(W, 32)
1576 XXBLEND(D, 64)
1577 #undef XXBLEND
1578
1579 #define VNEG(name, element) \
1580 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1581 { \
1582 int i; \
1583 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1584 r->element[i] = -b->element[i]; \
1585 } \
1586 }
1587 VNEG(vnegw, s32)
1588 VNEG(vnegd, s64)
1589 #undef VNEG
1590
1591 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1592 {
1593 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1594
1595 #if defined(HOST_WORDS_BIGENDIAN)
1596 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1597 memset(&r->u8[0], 0, sh);
1598 #else
1599 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1600 memset(&r->u8[16 - sh], 0, sh);
1601 #endif
1602 }
1603
1604 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1605 {
1606 int i;
1607
1608 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1609 r->u32[i] = a->u32[i] >= b->u32[i];
1610 }
1611 }
1612
1613 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1614 {
1615 int64_t t;
1616 int i, upper;
1617 ppc_avr_t result;
1618 int sat = 0;
1619
1620 upper = ARRAY_SIZE(r->s32) - 1;
1621 t = (int64_t)b->VsrSW(upper);
1622 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1623 t += a->VsrSW(i);
1624 result.VsrSW(i) = 0;
1625 }
1626 result.VsrSW(upper) = cvtsdsw(t, &sat);
1627 *r = result;
1628
1629 if (sat) {
1630 set_vscr_sat(env);
1631 }
1632 }
1633
1634 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1635 {
1636 int i, j, upper;
1637 ppc_avr_t result;
1638 int sat = 0;
1639
1640 upper = 1;
1641 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1642 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1643
1644 result.VsrD(i) = 0;
1645 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1646 t += a->VsrSW(2 * i + j);
1647 }
1648 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1649 }
1650
1651 *r = result;
1652 if (sat) {
1653 set_vscr_sat(env);
1654 }
1655 }
1656
1657 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1658 {
1659 int i, j;
1660 int sat = 0;
1661
1662 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1663 int64_t t = (int64_t)b->s32[i];
1664
1665 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1666 t += a->s8[4 * i + j];
1667 }
1668 r->s32[i] = cvtsdsw(t, &sat);
1669 }
1670
1671 if (sat) {
1672 set_vscr_sat(env);
1673 }
1674 }
1675
1676 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1677 {
1678 int sat = 0;
1679 int i;
1680
1681 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1682 int64_t t = (int64_t)b->s32[i];
1683
1684 t += a->s16[2 * i] + a->s16[2 * i + 1];
1685 r->s32[i] = cvtsdsw(t, &sat);
1686 }
1687
1688 if (sat) {
1689 set_vscr_sat(env);
1690 }
1691 }
1692
1693 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1694 {
1695 int i, j;
1696 int sat = 0;
1697
1698 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1699 uint64_t t = (uint64_t)b->u32[i];
1700
1701 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1702 t += a->u8[4 * i + j];
1703 }
1704 r->u32[i] = cvtuduw(t, &sat);
1705 }
1706
1707 if (sat) {
1708 set_vscr_sat(env);
1709 }
1710 }
1711
1712 #if defined(HOST_WORDS_BIGENDIAN)
1713 #define UPKHI 1
1714 #define UPKLO 0
1715 #else
1716 #define UPKHI 0
1717 #define UPKLO 1
1718 #endif
1719 #define VUPKPX(suffix, hi) \
1720 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1721 { \
1722 int i; \
1723 ppc_avr_t result; \
1724 \
1725 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1726 uint16_t e = b->u16[hi ? i : i + 4]; \
1727 uint8_t a = (e >> 15) ? 0xff : 0; \
1728 uint8_t r = (e >> 10) & 0x1f; \
1729 uint8_t g = (e >> 5) & 0x1f; \
1730 uint8_t b = e & 0x1f; \
1731 \
1732 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1733 } \
1734 *r = result; \
1735 }
1736 VUPKPX(lpx, UPKLO)
1737 VUPKPX(hpx, UPKHI)
1738 #undef VUPKPX
1739
1740 #define VUPK(suffix, unpacked, packee, hi) \
1741 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1742 { \
1743 int i; \
1744 ppc_avr_t result; \
1745 \
1746 if (hi) { \
1747 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1748 result.unpacked[i] = b->packee[i]; \
1749 } \
1750 } else { \
1751 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1752 i++) { \
1753 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1754 } \
1755 } \
1756 *r = result; \
1757 }
1758 VUPK(hsb, s16, s8, UPKHI)
1759 VUPK(hsh, s32, s16, UPKHI)
1760 VUPK(hsw, s64, s32, UPKHI)
1761 VUPK(lsb, s16, s8, UPKLO)
1762 VUPK(lsh, s32, s16, UPKLO)
1763 VUPK(lsw, s64, s32, UPKLO)
1764 #undef VUPK
1765 #undef UPKHI
1766 #undef UPKLO
1767
1768 #define VGENERIC_DO(name, element) \
1769 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1770 { \
1771 int i; \
1772 \
1773 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1774 r->element[i] = name(b->element[i]); \
1775 } \
1776 }
1777
1778 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1779 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1780
1781 VGENERIC_DO(clzb, u8)
1782 VGENERIC_DO(clzh, u16)
1783
1784 #undef clzb
1785 #undef clzh
1786
1787 #define ctzb(v) ((v) ? ctz32(v) : 8)
1788 #define ctzh(v) ((v) ? ctz32(v) : 16)
1789 #define ctzw(v) ctz32((v))
1790 #define ctzd(v) ctz64((v))
1791
1792 VGENERIC_DO(ctzb, u8)
1793 VGENERIC_DO(ctzh, u16)
1794 VGENERIC_DO(ctzw, u32)
1795 VGENERIC_DO(ctzd, u64)
1796
1797 #undef ctzb
1798 #undef ctzh
1799 #undef ctzw
1800 #undef ctzd
1801
1802 #define popcntb(v) ctpop8(v)
1803 #define popcnth(v) ctpop16(v)
1804 #define popcntw(v) ctpop32(v)
1805 #define popcntd(v) ctpop64(v)
1806
1807 VGENERIC_DO(popcntb, u8)
1808 VGENERIC_DO(popcnth, u16)
1809 VGENERIC_DO(popcntw, u32)
1810 VGENERIC_DO(popcntd, u64)
1811
1812 #undef popcntb
1813 #undef popcnth
1814 #undef popcntw
1815 #undef popcntd
1816
1817 #undef VGENERIC_DO
1818
1819 #if defined(HOST_WORDS_BIGENDIAN)
1820 #define QW_ONE { .u64 = { 0, 1 } }
1821 #else
1822 #define QW_ONE { .u64 = { 1, 0 } }
1823 #endif
1824
1825 #ifndef CONFIG_INT128
1826
1827 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1828 {
1829 t->u64[0] = ~a.u64[0];
1830 t->u64[1] = ~a.u64[1];
1831 }
1832
1833 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1834 {
1835 if (a.VsrD(0) < b.VsrD(0)) {
1836 return -1;
1837 } else if (a.VsrD(0) > b.VsrD(0)) {
1838 return 1;
1839 } else if (a.VsrD(1) < b.VsrD(1)) {
1840 return -1;
1841 } else if (a.VsrD(1) > b.VsrD(1)) {
1842 return 1;
1843 } else {
1844 return 0;
1845 }
1846 }
1847
1848 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1849 {
1850 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1851 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1852 (~a.VsrD(1) < b.VsrD(1));
1853 }
1854
1855 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1856 {
1857 ppc_avr_t not_a;
1858 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1859 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1860 (~a.VsrD(1) < b.VsrD(1));
1861 avr_qw_not(&not_a, a);
1862 return avr_qw_cmpu(not_a, b) < 0;
1863 }
1864
1865 #endif
1866
1867 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1868 {
1869 #ifdef CONFIG_INT128
1870 r->u128 = a->u128 + b->u128;
1871 #else
1872 avr_qw_add(r, *a, *b);
1873 #endif
1874 }
1875
1876 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1877 {
1878 #ifdef CONFIG_INT128
1879 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1880 #else
1881
1882 if (c->VsrD(1) & 1) {
1883 ppc_avr_t tmp;
1884
1885 tmp.VsrD(0) = 0;
1886 tmp.VsrD(1) = c->VsrD(1) & 1;
1887 avr_qw_add(&tmp, *a, tmp);
1888 avr_qw_add(r, tmp, *b);
1889 } else {
1890 avr_qw_add(r, *a, *b);
1891 }
1892 #endif
1893 }
1894
1895 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1896 {
1897 #ifdef CONFIG_INT128
1898 r->u128 = (~a->u128 < b->u128);
1899 #else
1900 ppc_avr_t not_a;
1901
1902 avr_qw_not(&not_a, *a);
1903
1904 r->VsrD(0) = 0;
1905 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1906 #endif
1907 }
1908
1909 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1910 {
1911 #ifdef CONFIG_INT128
1912 int carry_out = (~a->u128 < b->u128);
1913 if (!carry_out && (c->u128 & 1)) {
1914 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1915 ((a->u128 != 0) || (b->u128 != 0));
1916 }
1917 r->u128 = carry_out;
1918 #else
1919
1920 int carry_in = c->VsrD(1) & 1;
1921 int carry_out = 0;
1922 ppc_avr_t tmp;
1923
1924 carry_out = avr_qw_addc(&tmp, *a, *b);
1925
1926 if (!carry_out && carry_in) {
1927 ppc_avr_t one = QW_ONE;
1928 carry_out = avr_qw_addc(&tmp, tmp, one);
1929 }
1930 r->VsrD(0) = 0;
1931 r->VsrD(1) = carry_out;
1932 #endif
1933 }
1934
1935 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1936 {
1937 #ifdef CONFIG_INT128
1938 r->u128 = a->u128 - b->u128;
1939 #else
1940 ppc_avr_t tmp;
1941 ppc_avr_t one = QW_ONE;
1942
1943 avr_qw_not(&tmp, *b);
1944 avr_qw_add(&tmp, *a, tmp);
1945 avr_qw_add(r, tmp, one);
1946 #endif
1947 }
1948
1949 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1950 {
1951 #ifdef CONFIG_INT128
1952 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1953 #else
1954 ppc_avr_t tmp, sum;
1955
1956 avr_qw_not(&tmp, *b);
1957 avr_qw_add(&sum, *a, tmp);
1958
1959 tmp.VsrD(0) = 0;
1960 tmp.VsrD(1) = c->VsrD(1) & 1;
1961 avr_qw_add(r, sum, tmp);
1962 #endif
1963 }
1964
1965 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1966 {
1967 #ifdef CONFIG_INT128
1968 r->u128 = (~a->u128 < ~b->u128) ||
1969 (a->u128 + ~b->u128 == (__uint128_t)-1);
1970 #else
1971 int carry = (avr_qw_cmpu(*a, *b) > 0);
1972 if (!carry) {
1973 ppc_avr_t tmp;
1974 avr_qw_not(&tmp, *b);
1975 avr_qw_add(&tmp, *a, tmp);
1976 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
1977 }
1978 r->VsrD(0) = 0;
1979 r->VsrD(1) = carry;
1980 #endif
1981 }
1982
1983 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1984 {
1985 #ifdef CONFIG_INT128
1986 r->u128 =
1987 (~a->u128 < ~b->u128) ||
1988 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
1989 #else
1990 int carry_in = c->VsrD(1) & 1;
1991 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
1992 if (!carry_out && carry_in) {
1993 ppc_avr_t tmp;
1994 avr_qw_not(&tmp, *b);
1995 avr_qw_add(&tmp, *a, tmp);
1996 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
1997 }
1998
1999 r->VsrD(0) = 0;
2000 r->VsrD(1) = carry_out;
2001 #endif
2002 }
2003
2004 #define BCD_PLUS_PREF_1 0xC
2005 #define BCD_PLUS_PREF_2 0xF
2006 #define BCD_PLUS_ALT_1 0xA
2007 #define BCD_NEG_PREF 0xD
2008 #define BCD_NEG_ALT 0xB
2009 #define BCD_PLUS_ALT_2 0xE
2010 #define NATIONAL_PLUS 0x2B
2011 #define NATIONAL_NEG 0x2D
2012
2013 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2014
2015 static int bcd_get_sgn(ppc_avr_t *bcd)
2016 {
2017 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2018 case BCD_PLUS_PREF_1:
2019 case BCD_PLUS_PREF_2:
2020 case BCD_PLUS_ALT_1:
2021 case BCD_PLUS_ALT_2:
2022 {
2023 return 1;
2024 }
2025
2026 case BCD_NEG_PREF:
2027 case BCD_NEG_ALT:
2028 {
2029 return -1;
2030 }
2031
2032 default:
2033 {
2034 return 0;
2035 }
2036 }
2037 }
2038
2039 static int bcd_preferred_sgn(int sgn, int ps)
2040 {
2041 if (sgn >= 0) {
2042 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2043 } else {
2044 return BCD_NEG_PREF;
2045 }
2046 }
2047
2048 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2049 {
2050 uint8_t result;
2051 if (n & 1) {
2052 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2053 } else {
2054 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2055 }
2056
2057 if (unlikely(result > 9)) {
2058 *invalid = true;
2059 }
2060 return result;
2061 }
2062
2063 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2064 {
2065 if (n & 1) {
2066 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2067 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2068 } else {
2069 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2070 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2071 }
2072 }
2073
2074 static bool bcd_is_valid(ppc_avr_t *bcd)
2075 {
2076 int i;
2077 int invalid = 0;
2078
2079 if (bcd_get_sgn(bcd) == 0) {
2080 return false;
2081 }
2082
2083 for (i = 1; i < 32; i++) {
2084 bcd_get_digit(bcd, i, &invalid);
2085 if (unlikely(invalid)) {
2086 return false;
2087 }
2088 }
2089 return true;
2090 }
2091
2092 static int bcd_cmp_zero(ppc_avr_t *bcd)
2093 {
2094 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2095 return CRF_EQ;
2096 } else {
2097 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2098 }
2099 }
2100
2101 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2102 {
2103 return reg->VsrH(7 - n);
2104 }
2105
2106 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2107 {
2108 reg->VsrH(7 - n) = val;
2109 }
2110
2111 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2112 {
2113 int i;
2114 int invalid = 0;
2115 for (i = 31; i > 0; i--) {
2116 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2117 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2118 if (unlikely(invalid)) {
2119 return 0; /* doesn't matter */
2120 } else if (dig_a > dig_b) {
2121 return 1;
2122 } else if (dig_a < dig_b) {
2123 return -1;
2124 }
2125 }
2126
2127 return 0;
2128 }
2129
2130 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2131 int *overflow)
2132 {
2133 int carry = 0;
2134 int i;
2135 int is_zero = 1;
2136
2137 for (i = 1; i <= 31; i++) {
2138 uint8_t digit = bcd_get_digit(a, i, invalid) +
2139 bcd_get_digit(b, i, invalid) + carry;
2140 is_zero &= (digit == 0);
2141 if (digit > 9) {
2142 carry = 1;
2143 digit -= 10;
2144 } else {
2145 carry = 0;
2146 }
2147
2148 bcd_put_digit(t, digit, i);
2149 }
2150
2151 *overflow = carry;
2152 return is_zero;
2153 }
2154
2155 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2156 int *overflow)
2157 {
2158 int carry = 0;
2159 int i;
2160
2161 for (i = 1; i <= 31; i++) {
2162 uint8_t digit = bcd_get_digit(a, i, invalid) -
2163 bcd_get_digit(b, i, invalid) + carry;
2164 if (digit & 0x80) {
2165 carry = -1;
2166 digit += 10;
2167 } else {
2168 carry = 0;
2169 }
2170
2171 bcd_put_digit(t, digit, i);
2172 }
2173
2174 *overflow = carry;
2175 }
2176
2177 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2178 {
2179
2180 int sgna = bcd_get_sgn(a);
2181 int sgnb = bcd_get_sgn(b);
2182 int invalid = (sgna == 0) || (sgnb == 0);
2183 int overflow = 0;
2184 int zero = 0;
2185 uint32_t cr = 0;
2186 ppc_avr_t result = { .u64 = { 0, 0 } };
2187
2188 if (!invalid) {
2189 if (sgna == sgnb) {
2190 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2191 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2192 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2193 } else {
2194 int magnitude = bcd_cmp_mag(a, b);
2195 if (magnitude > 0) {
2196 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2197 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2198 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2199 } else if (magnitude < 0) {
2200 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2201 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2202 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2203 } else {
2204 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2205 cr = CRF_EQ;
2206 }
2207 }
2208 }
2209
2210 if (unlikely(invalid)) {
2211 result.VsrD(0) = result.VsrD(1) = -1;
2212 cr = CRF_SO;
2213 } else if (overflow) {
2214 cr |= CRF_SO;
2215 } else if (zero) {
2216 cr |= CRF_EQ;
2217 }
2218
2219 *r = result;
2220
2221 return cr;
2222 }
2223
2224 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2225 {
2226 ppc_avr_t bcopy = *b;
2227 int sgnb = bcd_get_sgn(b);
2228 if (sgnb < 0) {
2229 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2230 } else if (sgnb > 0) {
2231 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2232 }
2233 /* else invalid ... defer to bcdadd code for proper handling */
2234
2235 return helper_bcdadd(r, a, &bcopy, ps);
2236 }
2237
2238 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2239 {
2240 int i;
2241 int cr = 0;
2242 uint16_t national = 0;
2243 uint16_t sgnb = get_national_digit(b, 0);
2244 ppc_avr_t ret = { .u64 = { 0, 0 } };
2245 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2246
2247 for (i = 1; i < 8; i++) {
2248 national = get_national_digit(b, i);
2249 if (unlikely(national < 0x30 || national > 0x39)) {
2250 invalid = 1;
2251 break;
2252 }
2253
2254 bcd_put_digit(&ret, national & 0xf, i);
2255 }
2256
2257 if (sgnb == NATIONAL_PLUS) {
2258 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2259 } else {
2260 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2261 }
2262
2263 cr = bcd_cmp_zero(&ret);
2264
2265 if (unlikely(invalid)) {
2266 cr = CRF_SO;
2267 }
2268
2269 *r = ret;
2270
2271 return cr;
2272 }
2273
2274 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2275 {
2276 int i;
2277 int cr = 0;
2278 int sgnb = bcd_get_sgn(b);
2279 int invalid = (sgnb == 0);
2280 ppc_avr_t ret = { .u64 = { 0, 0 } };
2281
2282 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2283
2284 for (i = 1; i < 8; i++) {
2285 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2286
2287 if (unlikely(invalid)) {
2288 break;
2289 }
2290 }
2291 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2292
2293 cr = bcd_cmp_zero(b);
2294
2295 if (ox_flag) {
2296 cr |= CRF_SO;
2297 }
2298
2299 if (unlikely(invalid)) {
2300 cr = CRF_SO;
2301 }
2302
2303 *r = ret;
2304
2305 return cr;
2306 }
2307
2308 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2309 {
2310 int i;
2311 int cr = 0;
2312 int invalid = 0;
2313 int zone_digit = 0;
2314 int zone_lead = ps ? 0xF : 0x3;
2315 int digit = 0;
2316 ppc_avr_t ret = { .u64 = { 0, 0 } };
2317 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2318
2319 if (unlikely((sgnb < 0xA) && ps)) {
2320 invalid = 1;
2321 }
2322
2323 for (i = 0; i < 16; i++) {
2324 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2325 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2326 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2327 invalid = 1;
2328 break;
2329 }
2330
2331 bcd_put_digit(&ret, digit, i + 1);
2332 }
2333
2334 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2335 (!ps && (sgnb & 0x4))) {
2336 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2337 } else {
2338 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2339 }
2340
2341 cr = bcd_cmp_zero(&ret);
2342
2343 if (unlikely(invalid)) {
2344 cr = CRF_SO;
2345 }
2346
2347 *r = ret;
2348
2349 return cr;
2350 }
2351
2352 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2353 {
2354 int i;
2355 int cr = 0;
2356 uint8_t digit = 0;
2357 int sgnb = bcd_get_sgn(b);
2358 int zone_lead = (ps) ? 0xF0 : 0x30;
2359 int invalid = (sgnb == 0);
2360 ppc_avr_t ret = { .u64 = { 0, 0 } };
2361
2362 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2363
2364 for (i = 0; i < 16; i++) {
2365 digit = bcd_get_digit(b, i + 1, &invalid);
2366
2367 if (unlikely(invalid)) {
2368 break;
2369 }
2370
2371 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2372 }
2373
2374 if (ps) {
2375 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2376 } else {
2377 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2378 }
2379
2380 cr = bcd_cmp_zero(b);
2381
2382 if (ox_flag) {
2383 cr |= CRF_SO;
2384 }
2385
2386 if (unlikely(invalid)) {
2387 cr = CRF_SO;
2388 }
2389
2390 *r = ret;
2391
2392 return cr;
2393 }
2394
2395 /**
2396 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2397 *
2398 * Returns:
2399 * > 0 if ahi|alo > bhi|blo,
2400 * 0 if ahi|alo == bhi|blo,
2401 * < 0 if ahi|alo < bhi|blo
2402 */
2403 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2404 uint64_t blo, uint64_t bhi)
2405 {
2406 return (ahi == bhi) ?
2407 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2408 (ahi > bhi ? 1 : -1);
2409 }
2410
2411 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2412 {
2413 int i;
2414 int cr;
2415 uint64_t lo_value;
2416 uint64_t hi_value;
2417 uint64_t rem;
2418 ppc_avr_t ret = { .u64 = { 0, 0 } };
2419
2420 if (b->VsrSD(0) < 0) {
2421 lo_value = -b->VsrSD(1);
2422 hi_value = ~b->VsrD(0) + !lo_value;
2423 bcd_put_digit(&ret, 0xD, 0);
2424
2425 cr = CRF_LT;
2426 } else {
2427 lo_value = b->VsrD(1);
2428 hi_value = b->VsrD(0);
2429 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2430
2431 if (hi_value == 0 && lo_value == 0) {
2432 cr = CRF_EQ;
2433 } else {
2434 cr = CRF_GT;
2435 }
2436 }
2437
2438 /*
2439 * Check src limits: abs(src) <= 10^31 - 1
2440 *
2441 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2442 */
2443 if (ucmp128(lo_value, hi_value,
2444 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2445 cr |= CRF_SO;
2446
2447 /*
2448 * According to the ISA, if src wouldn't fit in the destination
2449 * register, the result is undefined.
2450 * In that case, we leave r unchanged.
2451 */
2452 } else {
2453 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2454
2455 for (i = 1; i < 16; rem /= 10, i++) {
2456 bcd_put_digit(&ret, rem % 10, i);
2457 }
2458
2459 for (; i < 32; lo_value /= 10, i++) {
2460 bcd_put_digit(&ret, lo_value % 10, i);
2461 }
2462
2463 *r = ret;
2464 }
2465
2466 return cr;
2467 }
2468
2469 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2470 {
2471 uint8_t i;
2472 int cr;
2473 uint64_t carry;
2474 uint64_t unused;
2475 uint64_t lo_value;
2476 uint64_t hi_value = 0;
2477 int sgnb = bcd_get_sgn(b);
2478 int invalid = (sgnb == 0);
2479
2480 lo_value = bcd_get_digit(b, 31, &invalid);
2481 for (i = 30; i > 0; i--) {
2482 mulu64(&lo_value, &carry, lo_value, 10ULL);
2483 mulu64(&hi_value, &unused, hi_value, 10ULL);
2484 lo_value += bcd_get_digit(b, i, &invalid);
2485 hi_value += carry;
2486
2487 if (unlikely(invalid)) {
2488 break;
2489 }
2490 }
2491
2492 if (sgnb == -1) {
2493 r->VsrSD(1) = -lo_value;
2494 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2495 } else {
2496 r->VsrSD(1) = lo_value;
2497 r->VsrSD(0) = hi_value;
2498 }
2499
2500 cr = bcd_cmp_zero(b);
2501
2502 if (unlikely(invalid)) {
2503 cr = CRF_SO;
2504 }
2505
2506 return cr;
2507 }
2508
2509 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2510 {
2511 int i;
2512 int invalid = 0;
2513
2514 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2515 return CRF_SO;
2516 }
2517
2518 *r = *a;
2519 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2520
2521 for (i = 1; i < 32; i++) {
2522 bcd_get_digit(a, i, &invalid);
2523 bcd_get_digit(b, i, &invalid);
2524 if (unlikely(invalid)) {
2525 return CRF_SO;
2526 }
2527 }
2528
2529 return bcd_cmp_zero(r);
2530 }
2531
2532 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2533 {
2534 int sgnb = bcd_get_sgn(b);
2535
2536 *r = *b;
2537 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2538
2539 if (bcd_is_valid(b) == false) {
2540 return CRF_SO;
2541 }
2542
2543 return bcd_cmp_zero(r);
2544 }
2545
2546 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2547 {
2548 int cr;
2549 int i = a->VsrSB(7);
2550 bool ox_flag = false;
2551 int sgnb = bcd_get_sgn(b);
2552 ppc_avr_t ret = *b;
2553 ret.VsrD(1) &= ~0xf;
2554
2555 if (bcd_is_valid(b) == false) {
2556 return CRF_SO;
2557 }
2558
2559 if (unlikely(i > 31)) {
2560 i = 31;
2561 } else if (unlikely(i < -31)) {
2562 i = -31;
2563 }
2564
2565 if (i > 0) {
2566 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2567 } else {
2568 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2569 }
2570 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2571
2572 *r = ret;
2573
2574 cr = bcd_cmp_zero(r);
2575 if (ox_flag) {
2576 cr |= CRF_SO;
2577 }
2578
2579 return cr;
2580 }
2581
2582 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2583 {
2584 int cr;
2585 int i;
2586 int invalid = 0;
2587 bool ox_flag = false;
2588 ppc_avr_t ret = *b;
2589
2590 for (i = 0; i < 32; i++) {
2591 bcd_get_digit(b, i, &invalid);
2592
2593 if (unlikely(invalid)) {
2594 return CRF_SO;
2595 }
2596 }
2597
2598 i = a->VsrSB(7);
2599 if (i >= 32) {
2600 ox_flag = true;
2601 ret.VsrD(1) = ret.VsrD(0) = 0;
2602 } else if (i <= -32) {
2603 ret.VsrD(1) = ret.VsrD(0) = 0;
2604 } else if (i > 0) {
2605 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2606 } else {
2607 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2608 }
2609 *r = ret;
2610
2611 cr = bcd_cmp_zero(r);
2612 if (ox_flag) {
2613 cr |= CRF_SO;
2614 }
2615
2616 return cr;
2617 }
2618
2619 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2620 {
2621 int cr;
2622 int unused = 0;
2623 int invalid = 0;
2624 bool ox_flag = false;
2625 int sgnb = bcd_get_sgn(b);
2626 ppc_avr_t ret = *b;
2627 ret.VsrD(1) &= ~0xf;
2628
2629 int i = a->VsrSB(7);
2630 ppc_avr_t bcd_one;
2631
2632 bcd_one.VsrD(0) = 0;
2633 bcd_one.VsrD(1) = 0x10;
2634
2635 if (bcd_is_valid(b) == false) {
2636 return CRF_SO;
2637 }
2638
2639 if (unlikely(i > 31)) {
2640 i = 31;
2641 } else if (unlikely(i < -31)) {
2642 i = -31;
2643 }
2644
2645 if (i > 0) {
2646 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2647 } else {
2648 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2649
2650 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2651 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2652 }
2653 }
2654 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2655
2656 cr = bcd_cmp_zero(&ret);
2657 if (ox_flag) {
2658 cr |= CRF_SO;
2659 }
2660 *r = ret;
2661
2662 return cr;
2663 }
2664
2665 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2666 {
2667 uint64_t mask;
2668 uint32_t ox_flag = 0;
2669 int i = a->VsrSH(3) + 1;
2670 ppc_avr_t ret = *b;
2671
2672 if (bcd_is_valid(b) == false) {
2673 return CRF_SO;
2674 }
2675
2676 if (i > 16 && i < 32) {
2677 mask = (uint64_t)-1 >> (128 - i * 4);
2678 if (ret.VsrD(0) & ~mask) {
2679 ox_flag = CRF_SO;
2680 }
2681
2682 ret.VsrD(0) &= mask;
2683 } else if (i >= 0 && i <= 16) {
2684 mask = (uint64_t)-1 >> (64 - i * 4);
2685 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2686 ox_flag = CRF_SO;
2687 }
2688
2689 ret.VsrD(1) &= mask;
2690 ret.VsrD(0) = 0;
2691 }
2692 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2693 *r = ret;
2694
2695 return bcd_cmp_zero(&ret) | ox_flag;
2696 }
2697
2698 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2699 {
2700 int i;
2701 uint64_t mask;
2702 uint32_t ox_flag = 0;
2703 int invalid = 0;
2704 ppc_avr_t ret = *b;
2705
2706 for (i = 0; i < 32; i++) {
2707 bcd_get_digit(b, i, &invalid);
2708
2709 if (unlikely(invalid)) {
2710 return CRF_SO;
2711 }
2712 }
2713
2714 i = a->VsrSH(3);
2715 if (i > 16 && i < 33) {
2716 mask = (uint64_t)-1 >> (128 - i * 4);
2717 if (ret.VsrD(0) & ~mask) {
2718 ox_flag = CRF_SO;
2719 }
2720
2721 ret.VsrD(0) &= mask;
2722 } else if (i > 0 && i <= 16) {
2723 mask = (uint64_t)-1 >> (64 - i * 4);
2724 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2725 ox_flag = CRF_SO;
2726 }
2727
2728 ret.VsrD(1) &= mask;
2729 ret.VsrD(0) = 0;
2730 } else if (i == 0) {
2731 if (ret.VsrD(0) || ret.VsrD(1)) {
2732 ox_flag = CRF_SO;
2733 }
2734 ret.VsrD(0) = ret.VsrD(1) = 0;
2735 }
2736
2737 *r = ret;
2738 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2739 return ox_flag | CRF_EQ;
2740 }
2741
2742 return ox_flag | CRF_GT;
2743 }
2744
2745 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2746 {
2747 int i;
2748 VECTOR_FOR_INORDER_I(i, u8) {
2749 r->u8[i] = AES_sbox[a->u8[i]];
2750 }
2751 }
2752
2753 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2754 {
2755 ppc_avr_t result;
2756 int i;
2757
2758 VECTOR_FOR_INORDER_I(i, u32) {
2759 result.VsrW(i) = b->VsrW(i) ^
2760 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2761 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2762 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2763 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2764 }
2765 *r = result;
2766 }
2767
2768 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2769 {
2770 ppc_avr_t result;
2771 int i;
2772
2773 VECTOR_FOR_INORDER_I(i, u8) {
2774 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2775 }
2776 *r = result;
2777 }
2778
2779 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2780 {
2781 /* This differs from what is written in ISA V2.07. The RTL is */
2782 /* incorrect and will be fixed in V2.07B. */
2783 int i;
2784 ppc_avr_t tmp;
2785
2786 VECTOR_FOR_INORDER_I(i, u8) {
2787 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2788 }
2789
2790 VECTOR_FOR_INORDER_I(i, u32) {
2791 r->VsrW(i) =
2792 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2793 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2794 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2795 AES_imc[tmp.VsrB(4 * i + 3)][3];
2796 }
2797 }
2798
2799 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2800 {
2801 ppc_avr_t result;
2802 int i;
2803
2804 VECTOR_FOR_INORDER_I(i, u8) {
2805 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2806 }
2807 *r = result;
2808 }
2809
2810 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2811 {
2812 int st = (st_six & 0x10) != 0;
2813 int six = st_six & 0xF;
2814 int i;
2815
2816 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2817 if (st == 0) {
2818 if ((six & (0x8 >> i)) == 0) {
2819 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2820 ror32(a->VsrW(i), 18) ^
2821 (a->VsrW(i) >> 3);
2822 } else { /* six.bit[i] == 1 */
2823 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2824 ror32(a->VsrW(i), 19) ^
2825 (a->VsrW(i) >> 10);
2826 }
2827 } else { /* st == 1 */
2828 if ((six & (0x8 >> i)) == 0) {
2829 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2830 ror32(a->VsrW(i), 13) ^
2831 ror32(a->VsrW(i), 22);
2832 } else { /* six.bit[i] == 1 */
2833 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2834 ror32(a->VsrW(i), 11) ^
2835 ror32(a->VsrW(i), 25);
2836 }
2837 }
2838 }
2839 }
2840
2841 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2842 {
2843 int st = (st_six & 0x10) != 0;
2844 int six = st_six & 0xF;
2845 int i;
2846
2847 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2848 if (st == 0) {
2849 if ((six & (0x8 >> (2 * i))) == 0) {
2850 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2851 ror64(a->VsrD(i), 8) ^
2852 (a->VsrD(i) >> 7);
2853 } else { /* six.bit[2*i] == 1 */
2854 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2855 ror64(a->VsrD(i), 61) ^
2856 (a->VsrD(i) >> 6);
2857 }
2858 } else { /* st == 1 */
2859 if ((six & (0x8 >> (2 * i))) == 0) {
2860 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2861 ror64(a->VsrD(i), 34) ^
2862 ror64(a->VsrD(i), 39);
2863 } else { /* six.bit[2*i] == 1 */
2864 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2865 ror64(a->VsrD(i), 18) ^
2866 ror64(a->VsrD(i), 41);
2867 }
2868 }
2869 }
2870 }
2871
2872 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2873 {
2874 ppc_avr_t result;
2875 int i;
2876
2877 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2878 int indexA = c->VsrB(i) >> 4;
2879 int indexB = c->VsrB(i) & 0xF;
2880
2881 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2882 }
2883 *r = result;
2884 }
2885
2886 #undef VECTOR_FOR_INORDER_I
2887
2888 /*****************************************************************************/
2889 /* SPE extension helpers */
2890 /* Use a table to make this quicker */
2891 static const uint8_t hbrev[16] = {
2892 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2893 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2894 };
2895
2896 static inline uint8_t byte_reverse(uint8_t val)
2897 {
2898 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2899 }
2900
2901 static inline uint32_t word_reverse(uint32_t val)
2902 {
2903 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2904 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2905 }
2906
2907 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2908 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2909 {
2910 uint32_t a, b, d, mask;
2911
2912 mask = UINT32_MAX >> (32 - MASKBITS);
2913 a = arg1 & mask;
2914 b = arg2 & mask;
2915 d = word_reverse(1 + word_reverse(a | ~b));
2916 return (arg1 & ~mask) | (d & b);
2917 }
2918
2919 uint32_t helper_cntlsw32(uint32_t val)
2920 {
2921 if (val & 0x80000000) {
2922 return clz32(~val);
2923 } else {
2924 return clz32(val);
2925 }
2926 }
2927
2928 uint32_t helper_cntlzw32(uint32_t val)
2929 {
2930 return clz32(val);
2931 }
2932
2933 /* 440 specific */
2934 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2935 target_ulong low, uint32_t update_Rc)
2936 {
2937 target_ulong mask;
2938 int i;
2939
2940 i = 1;
2941 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2942 if ((high & mask) == 0) {
2943 if (update_Rc) {
2944 env->crf[0] = 0x4;
2945 }
2946 goto done;
2947 }
2948 i++;
2949 }
2950 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2951 if ((low & mask) == 0) {
2952 if (update_Rc) {
2953 env->crf[0] = 0x8;
2954 }
2955 goto done;
2956 }
2957 i++;
2958 }
2959 i = 8;
2960 if (update_Rc) {
2961 env->crf[0] = 0x2;
2962 }
2963 done:
2964 env->xer = (env->xer & ~0x7F) | i;
2965 if (update_Rc) {
2966 env->crf[0] |= xer_so;
2967 }
2968 return i;
2969 }