]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target/ppc: Implement vpdepd/vpextd instruction
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
35
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 {
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
42 }
43 }
44
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
47 {
48 uint64_t rt = 0;
49 int overflow = 0;
50
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
53
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
59 }
60
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
63 }
64
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
67 }
68
69 return (target_ulong)rt;
70 }
71
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
74 {
75 int64_t rt = 0;
76 int overflow = 0;
77
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
80
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
87 }
88
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
91 }
92
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
95 }
96
97 return (target_ulong)rt;
98 }
99
100 #if defined(TARGET_PPC64)
101
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 {
104 uint64_t rt = 0;
105 int overflow = 0;
106
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
112 }
113
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
116 }
117
118 return rt;
119 }
120
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 {
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
127
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
133 }
134
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
137 }
138
139 return rt;
140 }
141
142 #endif
143
144
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
148
149 /*
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
154 */
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
156
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
159
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
161 {
162 return hasvalue(rb, ra) ? CRF_GT : 0;
163 }
164
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
168
169 /*
170 * Return a random number.
171 */
172 uint64_t helper_darn32(void)
173 {
174 Error *err = NULL;
175 uint32_t ret;
176
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
182 }
183
184 return ret;
185 }
186
187 uint64_t helper_darn64(void)
188 {
189 Error *err = NULL;
190 uint64_t ret;
191
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
197 }
198
199 return ret;
200 }
201
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
203 {
204 int i;
205 uint64_t ra = 0;
206
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
212 }
213 }
214 }
215 return ra;
216 }
217
218 #endif
219
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
221 {
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
225
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
229 }
230 mask <<= 8;
231 }
232 return ra;
233 }
234
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
238 {
239 int32_t ret;
240
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
249 }
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
253 }
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
257 }
258 return (target_long)ret;
259 }
260
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
264 {
265 int64_t ret;
266
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
275 }
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
279 }
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
283 }
284 return ret;
285 }
286 #endif
287
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
299 }
300
301 target_ulong helper_popcntw(target_ulong val)
302 {
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
315 }
316 #else
317 target_ulong helper_popcntb(target_ulong val)
318 {
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
324 }
325 #endif
326
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
328 {
329 /*
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
334 */
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
338
339 if (mask == 0 || mask == -1) {
340 return src;
341 }
342
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
349 }
350
351 /*
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
355 */
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
361 }
362
363 /*
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
368 */
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
374 }
375
376 /*
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
379 */
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
384 }
385
386 return left | (right >> n);
387 }
388
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
390 {
391 int i, o;
392 uint64_t result = 0;
393
394 if (mask == -1) {
395 return src;
396 }
397
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
402 }
403
404 return result;
405 }
406
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
408 {
409 int i, o;
410 uint64_t result = 0;
411
412 if (mask == -1) {
413 return src;
414 }
415
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
420 }
421
422 return result;
423 }
424
425 /*****************************************************************************/
426 /* PowerPC 601 specific instructions (POWER bridge) */
427 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
428 {
429 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
430
431 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
432 (int32_t)arg2 == 0) {
433 env->spr[SPR_MQ] = 0;
434 return INT32_MIN;
435 } else {
436 env->spr[SPR_MQ] = tmp % arg2;
437 return tmp / (int32_t)arg2;
438 }
439 }
440
441 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
442 target_ulong arg2)
443 {
444 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
445
446 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
447 (int32_t)arg2 == 0) {
448 env->so = env->ov = 1;
449 env->spr[SPR_MQ] = 0;
450 return INT32_MIN;
451 } else {
452 env->spr[SPR_MQ] = tmp % arg2;
453 tmp /= (int32_t)arg2;
454 if ((int32_t)tmp != tmp) {
455 env->so = env->ov = 1;
456 } else {
457 env->ov = 0;
458 }
459 return tmp;
460 }
461 }
462
463 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
464 target_ulong arg2)
465 {
466 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
467 (int32_t)arg2 == 0) {
468 env->spr[SPR_MQ] = 0;
469 return INT32_MIN;
470 } else {
471 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
472 return (int32_t)arg1 / (int32_t)arg2;
473 }
474 }
475
476 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
477 target_ulong arg2)
478 {
479 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
480 (int32_t)arg2 == 0) {
481 env->so = env->ov = 1;
482 env->spr[SPR_MQ] = 0;
483 return INT32_MIN;
484 } else {
485 env->ov = 0;
486 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
487 return (int32_t)arg1 / (int32_t)arg2;
488 }
489 }
490
491 /*****************************************************************************/
492 /* 602 specific instructions */
493 /* mfrom is the most crazy instruction ever seen, imho ! */
494 /* Real implementation uses a ROM table. Do the same */
495 /*
496 * Extremely decomposed:
497 * -arg / 256
498 * return 256 * log10(10 + 1.0) + 0.5
499 */
500 #if !defined(CONFIG_USER_ONLY)
501 target_ulong helper_602_mfrom(target_ulong arg)
502 {
503 if (likely(arg < 602)) {
504 #include "mfrom_table.c.inc"
505 return mfrom_ROM_table[arg];
506 } else {
507 return 0;
508 }
509 }
510 #endif
511
512 /*****************************************************************************/
513 /* Altivec extension helpers */
514 #if defined(HOST_WORDS_BIGENDIAN)
515 #define VECTOR_FOR_INORDER_I(index, element) \
516 for (index = 0; index < ARRAY_SIZE(r->element); index++)
517 #else
518 #define VECTOR_FOR_INORDER_I(index, element) \
519 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
520 #endif
521
522 /* Saturating arithmetic helpers. */
523 #define SATCVT(from, to, from_type, to_type, min, max) \
524 static inline to_type cvt##from##to(from_type x, int *sat) \
525 { \
526 to_type r; \
527 \
528 if (x < (from_type)min) { \
529 r = min; \
530 *sat = 1; \
531 } else if (x > (from_type)max) { \
532 r = max; \
533 *sat = 1; \
534 } else { \
535 r = x; \
536 } \
537 return r; \
538 }
539 #define SATCVTU(from, to, from_type, to_type, min, max) \
540 static inline to_type cvt##from##to(from_type x, int *sat) \
541 { \
542 to_type r; \
543 \
544 if (x > (from_type)max) { \
545 r = max; \
546 *sat = 1; \
547 } else { \
548 r = x; \
549 } \
550 return r; \
551 }
552 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
553 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
554 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
555
556 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
557 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
558 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
559 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
560 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
561 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
562 #undef SATCVT
563 #undef SATCVTU
564
565 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
566 {
567 ppc_store_vscr(env, vscr);
568 }
569
570 uint32_t helper_mfvscr(CPUPPCState *env)
571 {
572 return ppc_get_vscr(env);
573 }
574
575 static inline void set_vscr_sat(CPUPPCState *env)
576 {
577 /* The choice of non-zero value is arbitrary. */
578 env->vscr_sat.u32[0] = 1;
579 }
580
581 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
582 {
583 int i;
584
585 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
586 r->u32[i] = ~a->u32[i] < b->u32[i];
587 }
588 }
589
590 /* vprtybw */
591 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
592 {
593 int i;
594 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
595 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
596 res ^= res >> 8;
597 r->u32[i] = res & 1;
598 }
599 }
600
601 /* vprtybd */
602 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
603 {
604 int i;
605 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
606 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
607 res ^= res >> 16;
608 res ^= res >> 8;
609 r->u64[i] = res & 1;
610 }
611 }
612
613 /* vprtybq */
614 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
615 {
616 uint64_t res = b->u64[0] ^ b->u64[1];
617 res ^= res >> 32;
618 res ^= res >> 16;
619 res ^= res >> 8;
620 r->VsrD(1) = res & 1;
621 r->VsrD(0) = 0;
622 }
623
624 #define VARITHFP(suffix, func) \
625 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
626 ppc_avr_t *b) \
627 { \
628 int i; \
629 \
630 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
631 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
632 } \
633 }
634 VARITHFP(addfp, float32_add)
635 VARITHFP(subfp, float32_sub)
636 VARITHFP(minfp, float32_min)
637 VARITHFP(maxfp, float32_max)
638 #undef VARITHFP
639
640 #define VARITHFPFMA(suffix, type) \
641 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
642 ppc_avr_t *b, ppc_avr_t *c) \
643 { \
644 int i; \
645 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
646 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
647 type, &env->vec_status); \
648 } \
649 }
650 VARITHFPFMA(maddfp, 0);
651 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
652 #undef VARITHFPFMA
653
654 #define VARITHSAT_CASE(type, op, cvt, element) \
655 { \
656 type result = (type)a->element[i] op (type)b->element[i]; \
657 r->element[i] = cvt(result, &sat); \
658 }
659
660 #define VARITHSAT_DO(name, op, optype, cvt, element) \
661 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
662 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
663 { \
664 int sat = 0; \
665 int i; \
666 \
667 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
668 VARITHSAT_CASE(optype, op, cvt, element); \
669 } \
670 if (sat) { \
671 vscr_sat->u32[0] = 1; \
672 } \
673 }
674 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
675 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
676 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
677 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
678 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
679 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
680 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
681 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
682 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
683 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
684 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
685 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
686 #undef VARITHSAT_CASE
687 #undef VARITHSAT_DO
688 #undef VARITHSAT_SIGNED
689 #undef VARITHSAT_UNSIGNED
690
691 #define VAVG_DO(name, element, etype) \
692 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
693 { \
694 int i; \
695 \
696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
697 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
698 r->element[i] = x >> 1; \
699 } \
700 }
701
702 #define VAVG(type, signed_element, signed_type, unsigned_element, \
703 unsigned_type) \
704 VAVG_DO(avgs##type, signed_element, signed_type) \
705 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
706 VAVG(b, s8, int16_t, u8, uint16_t)
707 VAVG(h, s16, int32_t, u16, uint32_t)
708 VAVG(w, s32, int64_t, u32, uint64_t)
709 #undef VAVG_DO
710 #undef VAVG
711
712 #define VABSDU_DO(name, element) \
713 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
714 { \
715 int i; \
716 \
717 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
718 r->element[i] = (a->element[i] > b->element[i]) ? \
719 (a->element[i] - b->element[i]) : \
720 (b->element[i] - a->element[i]); \
721 } \
722 }
723
724 /*
725 * VABSDU - Vector absolute difference unsigned
726 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
727 * element - element type to access from vector
728 */
729 #define VABSDU(type, element) \
730 VABSDU_DO(absdu##type, element)
731 VABSDU(b, u8)
732 VABSDU(h, u16)
733 VABSDU(w, u32)
734 #undef VABSDU_DO
735 #undef VABSDU
736
737 #define VCF(suffix, cvt, element) \
738 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
739 ppc_avr_t *b, uint32_t uim) \
740 { \
741 int i; \
742 \
743 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
744 float32 t = cvt(b->element[i], &env->vec_status); \
745 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
746 } \
747 }
748 VCF(ux, uint32_to_float32, u32)
749 VCF(sx, int32_to_float32, s32)
750 #undef VCF
751
752 #define VCMP_DO(suffix, compare, element, record) \
753 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
754 ppc_avr_t *a, ppc_avr_t *b) \
755 { \
756 uint64_t ones = (uint64_t)-1; \
757 uint64_t all = ones; \
758 uint64_t none = 0; \
759 int i; \
760 \
761 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
762 uint64_t result = (a->element[i] compare b->element[i] ? \
763 ones : 0x0); \
764 switch (sizeof(a->element[0])) { \
765 case 8: \
766 r->u64[i] = result; \
767 break; \
768 case 4: \
769 r->u32[i] = result; \
770 break; \
771 case 2: \
772 r->u16[i] = result; \
773 break; \
774 case 1: \
775 r->u8[i] = result; \
776 break; \
777 } \
778 all &= result; \
779 none |= result; \
780 } \
781 if (record) { \
782 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
783 } \
784 }
785 #define VCMP(suffix, compare, element) \
786 VCMP_DO(suffix, compare, element, 0) \
787 VCMP_DO(suffix##_dot, compare, element, 1)
788 VCMP(equb, ==, u8)
789 VCMP(equh, ==, u16)
790 VCMP(equw, ==, u32)
791 VCMP(equd, ==, u64)
792 VCMP(gtub, >, u8)
793 VCMP(gtuh, >, u16)
794 VCMP(gtuw, >, u32)
795 VCMP(gtud, >, u64)
796 VCMP(gtsb, >, s8)
797 VCMP(gtsh, >, s16)
798 VCMP(gtsw, >, s32)
799 VCMP(gtsd, >, s64)
800 #undef VCMP_DO
801 #undef VCMP
802
803 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
804 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
805 ppc_avr_t *a, ppc_avr_t *b) \
806 { \
807 etype ones = (etype)-1; \
808 etype all = ones; \
809 etype result, none = 0; \
810 int i; \
811 \
812 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
813 if (cmpzero) { \
814 result = ((a->element[i] == 0) \
815 || (b->element[i] == 0) \
816 || (a->element[i] != b->element[i]) ? \
817 ones : 0x0); \
818 } else { \
819 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
820 } \
821 r->element[i] = result; \
822 all &= result; \
823 none |= result; \
824 } \
825 if (record) { \
826 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
827 } \
828 }
829
830 /*
831 * VCMPNEZ - Vector compare not equal to zero
832 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
833 * element - element type to access from vector
834 */
835 #define VCMPNE(suffix, element, etype, cmpzero) \
836 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
837 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
838 VCMPNE(zb, u8, uint8_t, 1)
839 VCMPNE(zh, u16, uint16_t, 1)
840 VCMPNE(zw, u32, uint32_t, 1)
841 VCMPNE(b, u8, uint8_t, 0)
842 VCMPNE(h, u16, uint16_t, 0)
843 VCMPNE(w, u32, uint32_t, 0)
844 #undef VCMPNE_DO
845 #undef VCMPNE
846
847 #define VCMPFP_DO(suffix, compare, order, record) \
848 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
849 ppc_avr_t *a, ppc_avr_t *b) \
850 { \
851 uint32_t ones = (uint32_t)-1; \
852 uint32_t all = ones; \
853 uint32_t none = 0; \
854 int i; \
855 \
856 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
857 uint32_t result; \
858 FloatRelation rel = \
859 float32_compare_quiet(a->f32[i], b->f32[i], \
860 &env->vec_status); \
861 if (rel == float_relation_unordered) { \
862 result = 0; \
863 } else if (rel compare order) { \
864 result = ones; \
865 } else { \
866 result = 0; \
867 } \
868 r->u32[i] = result; \
869 all &= result; \
870 none |= result; \
871 } \
872 if (record) { \
873 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
874 } \
875 }
876 #define VCMPFP(suffix, compare, order) \
877 VCMPFP_DO(suffix, compare, order, 0) \
878 VCMPFP_DO(suffix##_dot, compare, order, 1)
879 VCMPFP(eqfp, ==, float_relation_equal)
880 VCMPFP(gefp, !=, float_relation_less)
881 VCMPFP(gtfp, ==, float_relation_greater)
882 #undef VCMPFP_DO
883 #undef VCMPFP
884
885 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
886 ppc_avr_t *a, ppc_avr_t *b, int record)
887 {
888 int i;
889 int all_in = 0;
890
891 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
892 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
893 &env->vec_status);
894 if (le_rel == float_relation_unordered) {
895 r->u32[i] = 0xc0000000;
896 all_in = 1;
897 } else {
898 float32 bneg = float32_chs(b->f32[i]);
899 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
900 &env->vec_status);
901 int le = le_rel != float_relation_greater;
902 int ge = ge_rel != float_relation_less;
903
904 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
905 all_in |= (!le | !ge);
906 }
907 }
908 if (record) {
909 env->crf[6] = (all_in == 0) << 1;
910 }
911 }
912
913 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
914 {
915 vcmpbfp_internal(env, r, a, b, 0);
916 }
917
918 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
919 ppc_avr_t *b)
920 {
921 vcmpbfp_internal(env, r, a, b, 1);
922 }
923
924 #define VCT(suffix, satcvt, element) \
925 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
926 ppc_avr_t *b, uint32_t uim) \
927 { \
928 int i; \
929 int sat = 0; \
930 float_status s = env->vec_status; \
931 \
932 set_float_rounding_mode(float_round_to_zero, &s); \
933 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
934 if (float32_is_any_nan(b->f32[i])) { \
935 r->element[i] = 0; \
936 } else { \
937 float64 t = float32_to_float64(b->f32[i], &s); \
938 int64_t j; \
939 \
940 t = float64_scalbn(t, uim, &s); \
941 j = float64_to_int64(t, &s); \
942 r->element[i] = satcvt(j, &sat); \
943 } \
944 } \
945 if (sat) { \
946 set_vscr_sat(env); \
947 } \
948 }
949 VCT(uxs, cvtsduw, u32)
950 VCT(sxs, cvtsdsw, s32)
951 #undef VCT
952
953 target_ulong helper_vclzlsbb(ppc_avr_t *r)
954 {
955 target_ulong count = 0;
956 int i;
957 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
958 if (r->VsrB(i) & 0x01) {
959 break;
960 }
961 count++;
962 }
963 return count;
964 }
965
966 target_ulong helper_vctzlsbb(ppc_avr_t *r)
967 {
968 target_ulong count = 0;
969 int i;
970 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
971 if (r->VsrB(i) & 0x01) {
972 break;
973 }
974 count++;
975 }
976 return count;
977 }
978
979 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
980 ppc_avr_t *b, ppc_avr_t *c)
981 {
982 int sat = 0;
983 int i;
984
985 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
986 int32_t prod = a->s16[i] * b->s16[i];
987 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
988
989 r->s16[i] = cvtswsh(t, &sat);
990 }
991
992 if (sat) {
993 set_vscr_sat(env);
994 }
995 }
996
997 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
998 ppc_avr_t *b, ppc_avr_t *c)
999 {
1000 int sat = 0;
1001 int i;
1002
1003 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1004 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
1005 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
1006 r->s16[i] = cvtswsh(t, &sat);
1007 }
1008
1009 if (sat) {
1010 set_vscr_sat(env);
1011 }
1012 }
1013
1014 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1015 {
1016 int i;
1017
1018 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1019 int32_t prod = a->s16[i] * b->s16[i];
1020 r->s16[i] = (int16_t) (prod + c->s16[i]);
1021 }
1022 }
1023
1024 #define VMRG_DO(name, element, access, ofs) \
1025 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1026 { \
1027 ppc_avr_t result; \
1028 int i, half = ARRAY_SIZE(r->element) / 2; \
1029 \
1030 for (i = 0; i < half; i++) { \
1031 result.access(i * 2 + 0) = a->access(i + ofs); \
1032 result.access(i * 2 + 1) = b->access(i + ofs); \
1033 } \
1034 *r = result; \
1035 }
1036
1037 #define VMRG(suffix, element, access) \
1038 VMRG_DO(mrgl##suffix, element, access, half) \
1039 VMRG_DO(mrgh##suffix, element, access, 0)
1040 VMRG(b, u8, VsrB)
1041 VMRG(h, u16, VsrH)
1042 VMRG(w, u32, VsrW)
1043 #undef VMRG_DO
1044 #undef VMRG
1045
1046 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1047 ppc_avr_t *b, ppc_avr_t *c)
1048 {
1049 int32_t prod[16];
1050 int i;
1051
1052 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1053 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1054 }
1055
1056 VECTOR_FOR_INORDER_I(i, s32) {
1057 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1058 prod[4 * i + 2] + prod[4 * i + 3];
1059 }
1060 }
1061
1062 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1063 ppc_avr_t *b, ppc_avr_t *c)
1064 {
1065 int32_t prod[8];
1066 int i;
1067
1068 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1069 prod[i] = a->s16[i] * b->s16[i];
1070 }
1071
1072 VECTOR_FOR_INORDER_I(i, s32) {
1073 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1074 }
1075 }
1076
1077 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1078 ppc_avr_t *b, ppc_avr_t *c)
1079 {
1080 int32_t prod[8];
1081 int i;
1082 int sat = 0;
1083
1084 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1085 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1086 }
1087
1088 VECTOR_FOR_INORDER_I(i, s32) {
1089 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1090
1091 r->u32[i] = cvtsdsw(t, &sat);
1092 }
1093
1094 if (sat) {
1095 set_vscr_sat(env);
1096 }
1097 }
1098
1099 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1100 ppc_avr_t *b, ppc_avr_t *c)
1101 {
1102 uint16_t prod[16];
1103 int i;
1104
1105 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1106 prod[i] = a->u8[i] * b->u8[i];
1107 }
1108
1109 VECTOR_FOR_INORDER_I(i, u32) {
1110 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1111 prod[4 * i + 2] + prod[4 * i + 3];
1112 }
1113 }
1114
1115 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1116 ppc_avr_t *b, ppc_avr_t *c)
1117 {
1118 uint32_t prod[8];
1119 int i;
1120
1121 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1122 prod[i] = a->u16[i] * b->u16[i];
1123 }
1124
1125 VECTOR_FOR_INORDER_I(i, u32) {
1126 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1127 }
1128 }
1129
1130 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1131 ppc_avr_t *b, ppc_avr_t *c)
1132 {
1133 uint32_t prod[8];
1134 int i;
1135 int sat = 0;
1136
1137 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1138 prod[i] = a->u16[i] * b->u16[i];
1139 }
1140
1141 VECTOR_FOR_INORDER_I(i, s32) {
1142 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1143
1144 r->u32[i] = cvtuduw(t, &sat);
1145 }
1146
1147 if (sat) {
1148 set_vscr_sat(env);
1149 }
1150 }
1151
1152 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1153 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1154 { \
1155 int i; \
1156 \
1157 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1158 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1159 (cast)b->mul_access(i); \
1160 } \
1161 }
1162
1163 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1164 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1165 { \
1166 int i; \
1167 \
1168 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1169 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1170 (cast)b->mul_access(i + 1); \
1171 } \
1172 }
1173
1174 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1175 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1176 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1177 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1178 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1179 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1180 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1181 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1182 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1183 #undef VMUL_DO_EVN
1184 #undef VMUL_DO_ODD
1185 #undef VMUL
1186
1187 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1188 {
1189 int i;
1190
1191 for (i = 0; i < 4; i++) {
1192 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
1193 }
1194 }
1195
1196 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1197 {
1198 int i;
1199
1200 for (i = 0; i < 4; i++) {
1201 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
1202 (uint64_t)b->u32[i]) >> 32);
1203 }
1204 }
1205
1206 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1207 {
1208 uint64_t discard;
1209
1210 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
1211 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
1212 }
1213
1214 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1215 {
1216 uint64_t discard;
1217
1218 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
1219 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
1220 }
1221
1222 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1223 ppc_avr_t *c)
1224 {
1225 ppc_avr_t result;
1226 int i;
1227
1228 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1229 int s = c->VsrB(i) & 0x1f;
1230 int index = s & 0xf;
1231
1232 if (s & 0x10) {
1233 result.VsrB(i) = b->VsrB(index);
1234 } else {
1235 result.VsrB(i) = a->VsrB(index);
1236 }
1237 }
1238 *r = result;
1239 }
1240
1241 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1242 ppc_avr_t *c)
1243 {
1244 ppc_avr_t result;
1245 int i;
1246
1247 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1248 int s = c->VsrB(i) & 0x1f;
1249 int index = 15 - (s & 0xf);
1250
1251 if (s & 0x10) {
1252 result.VsrB(i) = a->VsrB(index);
1253 } else {
1254 result.VsrB(i) = b->VsrB(index);
1255 }
1256 }
1257 *r = result;
1258 }
1259
1260 #if defined(HOST_WORDS_BIGENDIAN)
1261 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1262 #define VBPERMD_INDEX(i) (i)
1263 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1264 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1265 #else
1266 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1267 #define VBPERMD_INDEX(i) (1 - i)
1268 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1269 #define EXTRACT_BIT(avr, i, index) \
1270 (extract64((avr)->u64[1 - i], 63 - index, 1))
1271 #endif
1272
1273 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1274 {
1275 int i, j;
1276 ppc_avr_t result = { .u64 = { 0, 0 } };
1277 VECTOR_FOR_INORDER_I(i, u64) {
1278 for (j = 0; j < 8; j++) {
1279 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1280 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1281 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1282 }
1283 }
1284 }
1285 *r = result;
1286 }
1287
1288 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1289 {
1290 int i;
1291 uint64_t perm = 0;
1292
1293 VECTOR_FOR_INORDER_I(i, u8) {
1294 int index = VBPERMQ_INDEX(b, i);
1295
1296 if (index < 128) {
1297 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1298 if (a->u64[VBPERMQ_DW(index)] & mask) {
1299 perm |= (0x8000 >> i);
1300 }
1301 }
1302 }
1303
1304 r->VsrD(0) = perm;
1305 r->VsrD(1) = 0;
1306 }
1307
1308 #undef VBPERMQ_INDEX
1309 #undef VBPERMQ_DW
1310
1311 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1312 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1313 { \
1314 int i, j; \
1315 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1316 \
1317 VECTOR_FOR_INORDER_I(i, srcfld) { \
1318 prod[i] = 0; \
1319 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1320 if (a->srcfld[i] & (1ull << j)) { \
1321 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1322 } \
1323 } \
1324 } \
1325 \
1326 VECTOR_FOR_INORDER_I(i, trgfld) { \
1327 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1328 } \
1329 }
1330
1331 PMSUM(vpmsumb, u8, u16, uint16_t)
1332 PMSUM(vpmsumh, u16, u32, uint32_t)
1333 PMSUM(vpmsumw, u32, u64, uint64_t)
1334
1335 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1336 {
1337
1338 #ifdef CONFIG_INT128
1339 int i, j;
1340 __uint128_t prod[2];
1341
1342 VECTOR_FOR_INORDER_I(i, u64) {
1343 prod[i] = 0;
1344 for (j = 0; j < 64; j++) {
1345 if (a->u64[i] & (1ull << j)) {
1346 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1347 }
1348 }
1349 }
1350
1351 r->u128 = prod[0] ^ prod[1];
1352
1353 #else
1354 int i, j;
1355 ppc_avr_t prod[2];
1356
1357 VECTOR_FOR_INORDER_I(i, u64) {
1358 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1359 for (j = 0; j < 64; j++) {
1360 if (a->u64[i] & (1ull << j)) {
1361 ppc_avr_t bshift;
1362 if (j == 0) {
1363 bshift.VsrD(0) = 0;
1364 bshift.VsrD(1) = b->u64[i];
1365 } else {
1366 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1367 bshift.VsrD(1) = b->u64[i] << j;
1368 }
1369 prod[i].VsrD(1) ^= bshift.VsrD(1);
1370 prod[i].VsrD(0) ^= bshift.VsrD(0);
1371 }
1372 }
1373 }
1374
1375 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1376 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1377 #endif
1378 }
1379
1380
1381 #if defined(HOST_WORDS_BIGENDIAN)
1382 #define PKBIG 1
1383 #else
1384 #define PKBIG 0
1385 #endif
1386 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1387 {
1388 int i, j;
1389 ppc_avr_t result;
1390 #if defined(HOST_WORDS_BIGENDIAN)
1391 const ppc_avr_t *x[2] = { a, b };
1392 #else
1393 const ppc_avr_t *x[2] = { b, a };
1394 #endif
1395
1396 VECTOR_FOR_INORDER_I(i, u64) {
1397 VECTOR_FOR_INORDER_I(j, u32) {
1398 uint32_t e = x[i]->u32[j];
1399
1400 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1401 ((e >> 6) & 0x3e0) |
1402 ((e >> 3) & 0x1f));
1403 }
1404 }
1405 *r = result;
1406 }
1407
1408 #define VPK(suffix, from, to, cvt, dosat) \
1409 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1410 ppc_avr_t *a, ppc_avr_t *b) \
1411 { \
1412 int i; \
1413 int sat = 0; \
1414 ppc_avr_t result; \
1415 ppc_avr_t *a0 = PKBIG ? a : b; \
1416 ppc_avr_t *a1 = PKBIG ? b : a; \
1417 \
1418 VECTOR_FOR_INORDER_I(i, from) { \
1419 result.to[i] = cvt(a0->from[i], &sat); \
1420 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1421 } \
1422 *r = result; \
1423 if (dosat && sat) { \
1424 set_vscr_sat(env); \
1425 } \
1426 }
1427 #define I(x, y) (x)
1428 VPK(shss, s16, s8, cvtshsb, 1)
1429 VPK(shus, s16, u8, cvtshub, 1)
1430 VPK(swss, s32, s16, cvtswsh, 1)
1431 VPK(swus, s32, u16, cvtswuh, 1)
1432 VPK(sdss, s64, s32, cvtsdsw, 1)
1433 VPK(sdus, s64, u32, cvtsduw, 1)
1434 VPK(uhus, u16, u8, cvtuhub, 1)
1435 VPK(uwus, u32, u16, cvtuwuh, 1)
1436 VPK(udus, u64, u32, cvtuduw, 1)
1437 VPK(uhum, u16, u8, I, 0)
1438 VPK(uwum, u32, u16, I, 0)
1439 VPK(udum, u64, u32, I, 0)
1440 #undef I
1441 #undef VPK
1442 #undef PKBIG
1443
1444 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1445 {
1446 int i;
1447
1448 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1449 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1450 }
1451 }
1452
1453 #define VRFI(suffix, rounding) \
1454 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1455 ppc_avr_t *b) \
1456 { \
1457 int i; \
1458 float_status s = env->vec_status; \
1459 \
1460 set_float_rounding_mode(rounding, &s); \
1461 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1462 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1463 } \
1464 }
1465 VRFI(n, float_round_nearest_even)
1466 VRFI(m, float_round_down)
1467 VRFI(p, float_round_up)
1468 VRFI(z, float_round_to_zero)
1469 #undef VRFI
1470
1471 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1472 {
1473 int i;
1474
1475 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1476 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1477
1478 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1479 }
1480 }
1481
1482 #define VRLMI(name, size, element, insert) \
1483 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1484 { \
1485 int i; \
1486 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1487 uint##size##_t src1 = a->element[i]; \
1488 uint##size##_t src2 = b->element[i]; \
1489 uint##size##_t src3 = r->element[i]; \
1490 uint##size##_t begin, end, shift, mask, rot_val; \
1491 \
1492 shift = extract##size(src2, 0, 6); \
1493 end = extract##size(src2, 8, 6); \
1494 begin = extract##size(src2, 16, 6); \
1495 rot_val = rol##size(src1, shift); \
1496 mask = mask_u##size(begin, end); \
1497 if (insert) { \
1498 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1499 } else { \
1500 r->element[i] = (rot_val & mask); \
1501 } \
1502 } \
1503 }
1504
1505 VRLMI(vrldmi, 64, u64, 1);
1506 VRLMI(vrlwmi, 32, u32, 1);
1507 VRLMI(vrldnm, 64, u64, 0);
1508 VRLMI(vrlwnm, 32, u32, 0);
1509
1510 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1511 ppc_avr_t *c)
1512 {
1513 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1514 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1515 }
1516
1517 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1518 {
1519 int i;
1520
1521 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1522 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1523 }
1524 }
1525
1526 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1527 {
1528 int i;
1529
1530 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1531 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1532 }
1533 }
1534
1535 #define VEXTU_X_DO(name, size, left) \
1536 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1537 { \
1538 int index = (a & 0xf) * 8; \
1539 if (left) { \
1540 index = 128 - index - size; \
1541 } \
1542 return int128_getlo(int128_rshift(b->s128, index)) & \
1543 MAKE_64BIT_MASK(0, size); \
1544 }
1545 VEXTU_X_DO(vextublx, 8, 1)
1546 VEXTU_X_DO(vextuhlx, 16, 1)
1547 VEXTU_X_DO(vextuwlx, 32, 1)
1548 VEXTU_X_DO(vextubrx, 8, 0)
1549 VEXTU_X_DO(vextuhrx, 16, 0)
1550 VEXTU_X_DO(vextuwrx, 32, 0)
1551 #undef VEXTU_X_DO
1552
1553 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1554 {
1555 int i;
1556 unsigned int shift, bytes, size;
1557
1558 size = ARRAY_SIZE(r->u8);
1559 for (i = 0; i < size; i++) {
1560 shift = b->VsrB(i) & 0x7; /* extract shift value */
1561 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1562 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1563 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1564 }
1565 }
1566
1567 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1568 {
1569 int i;
1570 unsigned int shift, bytes;
1571
1572 /*
1573 * Use reverse order, as destination and source register can be
1574 * same. Its being modified in place saving temporary, reverse
1575 * order will guarantee that computed result is not fed back.
1576 */
1577 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1578 shift = b->VsrB(i) & 0x7; /* extract shift value */
1579 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1580 /* extract adjacent bytes */
1581 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1582 }
1583 }
1584
1585 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1586 {
1587 int sh = shift & 0xf;
1588 int i;
1589 ppc_avr_t result;
1590
1591 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1592 int index = sh + i;
1593 if (index > 0xf) {
1594 result.VsrB(i) = b->VsrB(index - 0x10);
1595 } else {
1596 result.VsrB(i) = a->VsrB(index);
1597 }
1598 }
1599 *r = result;
1600 }
1601
1602 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1603 {
1604 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1605
1606 #if defined(HOST_WORDS_BIGENDIAN)
1607 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1608 memset(&r->u8[16 - sh], 0, sh);
1609 #else
1610 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1611 memset(&r->u8[0], 0, sh);
1612 #endif
1613 }
1614
1615 #if defined(HOST_WORDS_BIGENDIAN)
1616 #define VINSERT(suffix, element) \
1617 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1618 { \
1619 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1620 sizeof(r->element[0])); \
1621 }
1622 #else
1623 #define VINSERT(suffix, element) \
1624 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1625 { \
1626 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1627 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1628 }
1629 #endif
1630 VINSERT(b, u8)
1631 VINSERT(h, u16)
1632 VINSERT(w, u32)
1633 VINSERT(d, u64)
1634 #undef VINSERT
1635 #if defined(HOST_WORDS_BIGENDIAN)
1636 #define VEXTRACT(suffix, element) \
1637 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1638 { \
1639 uint32_t es = sizeof(r->element[0]); \
1640 memmove(&r->u8[8 - es], &b->u8[index], es); \
1641 memset(&r->u8[8], 0, 8); \
1642 memset(&r->u8[0], 0, 8 - es); \
1643 }
1644 #else
1645 #define VEXTRACT(suffix, element) \
1646 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1647 { \
1648 uint32_t es = sizeof(r->element[0]); \
1649 uint32_t s = (16 - index) - es; \
1650 memmove(&r->u8[8], &b->u8[s], es); \
1651 memset(&r->u8[0], 0, 8); \
1652 memset(&r->u8[8 + es], 0, 8 - es); \
1653 }
1654 #endif
1655 VEXTRACT(ub, u8)
1656 VEXTRACT(uh, u16)
1657 VEXTRACT(uw, u32)
1658 VEXTRACT(d, u64)
1659 #undef VEXTRACT
1660
1661 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1662 ppc_vsr_t *xb, uint32_t index)
1663 {
1664 ppc_vsr_t t = { };
1665 size_t es = sizeof(uint32_t);
1666 uint32_t ext_index;
1667 int i;
1668
1669 ext_index = index;
1670 for (i = 0; i < es; i++, ext_index++) {
1671 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1672 }
1673
1674 *xt = t;
1675 }
1676
1677 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1678 ppc_vsr_t *xb, uint32_t index)
1679 {
1680 ppc_vsr_t t = *xt;
1681 size_t es = sizeof(uint32_t);
1682 int ins_index, i = 0;
1683
1684 ins_index = index;
1685 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1686 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1687 }
1688
1689 *xt = t;
1690 }
1691
1692 #define VEXT_SIGNED(name, element, cast) \
1693 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1694 { \
1695 int i; \
1696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1697 r->element[i] = (cast)b->element[i]; \
1698 } \
1699 }
1700 VEXT_SIGNED(vextsb2w, s32, int8_t)
1701 VEXT_SIGNED(vextsb2d, s64, int8_t)
1702 VEXT_SIGNED(vextsh2w, s32, int16_t)
1703 VEXT_SIGNED(vextsh2d, s64, int16_t)
1704 VEXT_SIGNED(vextsw2d, s64, int32_t)
1705 #undef VEXT_SIGNED
1706
1707 #define VNEG(name, element) \
1708 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1709 { \
1710 int i; \
1711 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1712 r->element[i] = -b->element[i]; \
1713 } \
1714 }
1715 VNEG(vnegw, s32)
1716 VNEG(vnegd, s64)
1717 #undef VNEG
1718
1719 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1720 {
1721 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1722
1723 #if defined(HOST_WORDS_BIGENDIAN)
1724 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1725 memset(&r->u8[0], 0, sh);
1726 #else
1727 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1728 memset(&r->u8[16 - sh], 0, sh);
1729 #endif
1730 }
1731
1732 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1733 {
1734 int i;
1735
1736 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1737 r->u32[i] = a->u32[i] >= b->u32[i];
1738 }
1739 }
1740
1741 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1742 {
1743 int64_t t;
1744 int i, upper;
1745 ppc_avr_t result;
1746 int sat = 0;
1747
1748 upper = ARRAY_SIZE(r->s32) - 1;
1749 t = (int64_t)b->VsrSW(upper);
1750 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1751 t += a->VsrSW(i);
1752 result.VsrSW(i) = 0;
1753 }
1754 result.VsrSW(upper) = cvtsdsw(t, &sat);
1755 *r = result;
1756
1757 if (sat) {
1758 set_vscr_sat(env);
1759 }
1760 }
1761
1762 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1763 {
1764 int i, j, upper;
1765 ppc_avr_t result;
1766 int sat = 0;
1767
1768 upper = 1;
1769 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1770 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1771
1772 result.VsrD(i) = 0;
1773 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1774 t += a->VsrSW(2 * i + j);
1775 }
1776 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1777 }
1778
1779 *r = result;
1780 if (sat) {
1781 set_vscr_sat(env);
1782 }
1783 }
1784
1785 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1786 {
1787 int i, j;
1788 int sat = 0;
1789
1790 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1791 int64_t t = (int64_t)b->s32[i];
1792
1793 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1794 t += a->s8[4 * i + j];
1795 }
1796 r->s32[i] = cvtsdsw(t, &sat);
1797 }
1798
1799 if (sat) {
1800 set_vscr_sat(env);
1801 }
1802 }
1803
1804 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1805 {
1806 int sat = 0;
1807 int i;
1808
1809 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1810 int64_t t = (int64_t)b->s32[i];
1811
1812 t += a->s16[2 * i] + a->s16[2 * i + 1];
1813 r->s32[i] = cvtsdsw(t, &sat);
1814 }
1815
1816 if (sat) {
1817 set_vscr_sat(env);
1818 }
1819 }
1820
1821 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1822 {
1823 int i, j;
1824 int sat = 0;
1825
1826 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1827 uint64_t t = (uint64_t)b->u32[i];
1828
1829 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1830 t += a->u8[4 * i + j];
1831 }
1832 r->u32[i] = cvtuduw(t, &sat);
1833 }
1834
1835 if (sat) {
1836 set_vscr_sat(env);
1837 }
1838 }
1839
1840 #if defined(HOST_WORDS_BIGENDIAN)
1841 #define UPKHI 1
1842 #define UPKLO 0
1843 #else
1844 #define UPKHI 0
1845 #define UPKLO 1
1846 #endif
1847 #define VUPKPX(suffix, hi) \
1848 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1849 { \
1850 int i; \
1851 ppc_avr_t result; \
1852 \
1853 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1854 uint16_t e = b->u16[hi ? i : i + 4]; \
1855 uint8_t a = (e >> 15) ? 0xff : 0; \
1856 uint8_t r = (e >> 10) & 0x1f; \
1857 uint8_t g = (e >> 5) & 0x1f; \
1858 uint8_t b = e & 0x1f; \
1859 \
1860 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1861 } \
1862 *r = result; \
1863 }
1864 VUPKPX(lpx, UPKLO)
1865 VUPKPX(hpx, UPKHI)
1866 #undef VUPKPX
1867
1868 #define VUPK(suffix, unpacked, packee, hi) \
1869 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1870 { \
1871 int i; \
1872 ppc_avr_t result; \
1873 \
1874 if (hi) { \
1875 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1876 result.unpacked[i] = b->packee[i]; \
1877 } \
1878 } else { \
1879 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1880 i++) { \
1881 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1882 } \
1883 } \
1884 *r = result; \
1885 }
1886 VUPK(hsb, s16, s8, UPKHI)
1887 VUPK(hsh, s32, s16, UPKHI)
1888 VUPK(hsw, s64, s32, UPKHI)
1889 VUPK(lsb, s16, s8, UPKLO)
1890 VUPK(lsh, s32, s16, UPKLO)
1891 VUPK(lsw, s64, s32, UPKLO)
1892 #undef VUPK
1893 #undef UPKHI
1894 #undef UPKLO
1895
1896 #define VGENERIC_DO(name, element) \
1897 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1898 { \
1899 int i; \
1900 \
1901 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1902 r->element[i] = name(b->element[i]); \
1903 } \
1904 }
1905
1906 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1907 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1908
1909 VGENERIC_DO(clzb, u8)
1910 VGENERIC_DO(clzh, u16)
1911
1912 #undef clzb
1913 #undef clzh
1914
1915 #define ctzb(v) ((v) ? ctz32(v) : 8)
1916 #define ctzh(v) ((v) ? ctz32(v) : 16)
1917 #define ctzw(v) ctz32((v))
1918 #define ctzd(v) ctz64((v))
1919
1920 VGENERIC_DO(ctzb, u8)
1921 VGENERIC_DO(ctzh, u16)
1922 VGENERIC_DO(ctzw, u32)
1923 VGENERIC_DO(ctzd, u64)
1924
1925 #undef ctzb
1926 #undef ctzh
1927 #undef ctzw
1928 #undef ctzd
1929
1930 #define popcntb(v) ctpop8(v)
1931 #define popcnth(v) ctpop16(v)
1932 #define popcntw(v) ctpop32(v)
1933 #define popcntd(v) ctpop64(v)
1934
1935 VGENERIC_DO(popcntb, u8)
1936 VGENERIC_DO(popcnth, u16)
1937 VGENERIC_DO(popcntw, u32)
1938 VGENERIC_DO(popcntd, u64)
1939
1940 #undef popcntb
1941 #undef popcnth
1942 #undef popcntw
1943 #undef popcntd
1944
1945 #undef VGENERIC_DO
1946
1947 #if defined(HOST_WORDS_BIGENDIAN)
1948 #define QW_ONE { .u64 = { 0, 1 } }
1949 #else
1950 #define QW_ONE { .u64 = { 1, 0 } }
1951 #endif
1952
1953 #ifndef CONFIG_INT128
1954
1955 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1956 {
1957 t->u64[0] = ~a.u64[0];
1958 t->u64[1] = ~a.u64[1];
1959 }
1960
1961 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1962 {
1963 if (a.VsrD(0) < b.VsrD(0)) {
1964 return -1;
1965 } else if (a.VsrD(0) > b.VsrD(0)) {
1966 return 1;
1967 } else if (a.VsrD(1) < b.VsrD(1)) {
1968 return -1;
1969 } else if (a.VsrD(1) > b.VsrD(1)) {
1970 return 1;
1971 } else {
1972 return 0;
1973 }
1974 }
1975
1976 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1977 {
1978 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1979 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1980 (~a.VsrD(1) < b.VsrD(1));
1981 }
1982
1983 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1984 {
1985 ppc_avr_t not_a;
1986 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1987 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1988 (~a.VsrD(1) < b.VsrD(1));
1989 avr_qw_not(&not_a, a);
1990 return avr_qw_cmpu(not_a, b) < 0;
1991 }
1992
1993 #endif
1994
1995 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1996 {
1997 #ifdef CONFIG_INT128
1998 r->u128 = a->u128 + b->u128;
1999 #else
2000 avr_qw_add(r, *a, *b);
2001 #endif
2002 }
2003
2004 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2005 {
2006 #ifdef CONFIG_INT128
2007 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2008 #else
2009
2010 if (c->VsrD(1) & 1) {
2011 ppc_avr_t tmp;
2012
2013 tmp.VsrD(0) = 0;
2014 tmp.VsrD(1) = c->VsrD(1) & 1;
2015 avr_qw_add(&tmp, *a, tmp);
2016 avr_qw_add(r, tmp, *b);
2017 } else {
2018 avr_qw_add(r, *a, *b);
2019 }
2020 #endif
2021 }
2022
2023 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2024 {
2025 #ifdef CONFIG_INT128
2026 r->u128 = (~a->u128 < b->u128);
2027 #else
2028 ppc_avr_t not_a;
2029
2030 avr_qw_not(&not_a, *a);
2031
2032 r->VsrD(0) = 0;
2033 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2034 #endif
2035 }
2036
2037 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2038 {
2039 #ifdef CONFIG_INT128
2040 int carry_out = (~a->u128 < b->u128);
2041 if (!carry_out && (c->u128 & 1)) {
2042 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2043 ((a->u128 != 0) || (b->u128 != 0));
2044 }
2045 r->u128 = carry_out;
2046 #else
2047
2048 int carry_in = c->VsrD(1) & 1;
2049 int carry_out = 0;
2050 ppc_avr_t tmp;
2051
2052 carry_out = avr_qw_addc(&tmp, *a, *b);
2053
2054 if (!carry_out && carry_in) {
2055 ppc_avr_t one = QW_ONE;
2056 carry_out = avr_qw_addc(&tmp, tmp, one);
2057 }
2058 r->VsrD(0) = 0;
2059 r->VsrD(1) = carry_out;
2060 #endif
2061 }
2062
2063 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2064 {
2065 #ifdef CONFIG_INT128
2066 r->u128 = a->u128 - b->u128;
2067 #else
2068 ppc_avr_t tmp;
2069 ppc_avr_t one = QW_ONE;
2070
2071 avr_qw_not(&tmp, *b);
2072 avr_qw_add(&tmp, *a, tmp);
2073 avr_qw_add(r, tmp, one);
2074 #endif
2075 }
2076
2077 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2078 {
2079 #ifdef CONFIG_INT128
2080 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2081 #else
2082 ppc_avr_t tmp, sum;
2083
2084 avr_qw_not(&tmp, *b);
2085 avr_qw_add(&sum, *a, tmp);
2086
2087 tmp.VsrD(0) = 0;
2088 tmp.VsrD(1) = c->VsrD(1) & 1;
2089 avr_qw_add(r, sum, tmp);
2090 #endif
2091 }
2092
2093 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2094 {
2095 #ifdef CONFIG_INT128
2096 r->u128 = (~a->u128 < ~b->u128) ||
2097 (a->u128 + ~b->u128 == (__uint128_t)-1);
2098 #else
2099 int carry = (avr_qw_cmpu(*a, *b) > 0);
2100 if (!carry) {
2101 ppc_avr_t tmp;
2102 avr_qw_not(&tmp, *b);
2103 avr_qw_add(&tmp, *a, tmp);
2104 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2105 }
2106 r->VsrD(0) = 0;
2107 r->VsrD(1) = carry;
2108 #endif
2109 }
2110
2111 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2112 {
2113 #ifdef CONFIG_INT128
2114 r->u128 =
2115 (~a->u128 < ~b->u128) ||
2116 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2117 #else
2118 int carry_in = c->VsrD(1) & 1;
2119 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2120 if (!carry_out && carry_in) {
2121 ppc_avr_t tmp;
2122 avr_qw_not(&tmp, *b);
2123 avr_qw_add(&tmp, *a, tmp);
2124 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2125 }
2126
2127 r->VsrD(0) = 0;
2128 r->VsrD(1) = carry_out;
2129 #endif
2130 }
2131
2132 #define BCD_PLUS_PREF_1 0xC
2133 #define BCD_PLUS_PREF_2 0xF
2134 #define BCD_PLUS_ALT_1 0xA
2135 #define BCD_NEG_PREF 0xD
2136 #define BCD_NEG_ALT 0xB
2137 #define BCD_PLUS_ALT_2 0xE
2138 #define NATIONAL_PLUS 0x2B
2139 #define NATIONAL_NEG 0x2D
2140
2141 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2142
2143 static int bcd_get_sgn(ppc_avr_t *bcd)
2144 {
2145 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2146 case BCD_PLUS_PREF_1:
2147 case BCD_PLUS_PREF_2:
2148 case BCD_PLUS_ALT_1:
2149 case BCD_PLUS_ALT_2:
2150 {
2151 return 1;
2152 }
2153
2154 case BCD_NEG_PREF:
2155 case BCD_NEG_ALT:
2156 {
2157 return -1;
2158 }
2159
2160 default:
2161 {
2162 return 0;
2163 }
2164 }
2165 }
2166
2167 static int bcd_preferred_sgn(int sgn, int ps)
2168 {
2169 if (sgn >= 0) {
2170 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2171 } else {
2172 return BCD_NEG_PREF;
2173 }
2174 }
2175
2176 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2177 {
2178 uint8_t result;
2179 if (n & 1) {
2180 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2181 } else {
2182 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2183 }
2184
2185 if (unlikely(result > 9)) {
2186 *invalid = true;
2187 }
2188 return result;
2189 }
2190
2191 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2192 {
2193 if (n & 1) {
2194 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2195 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2196 } else {
2197 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2198 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2199 }
2200 }
2201
2202 static bool bcd_is_valid(ppc_avr_t *bcd)
2203 {
2204 int i;
2205 int invalid = 0;
2206
2207 if (bcd_get_sgn(bcd) == 0) {
2208 return false;
2209 }
2210
2211 for (i = 1; i < 32; i++) {
2212 bcd_get_digit(bcd, i, &invalid);
2213 if (unlikely(invalid)) {
2214 return false;
2215 }
2216 }
2217 return true;
2218 }
2219
2220 static int bcd_cmp_zero(ppc_avr_t *bcd)
2221 {
2222 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2223 return CRF_EQ;
2224 } else {
2225 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2226 }
2227 }
2228
2229 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2230 {
2231 return reg->VsrH(7 - n);
2232 }
2233
2234 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2235 {
2236 reg->VsrH(7 - n) = val;
2237 }
2238
2239 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2240 {
2241 int i;
2242 int invalid = 0;
2243 for (i = 31; i > 0; i--) {
2244 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2245 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2246 if (unlikely(invalid)) {
2247 return 0; /* doesn't matter */
2248 } else if (dig_a > dig_b) {
2249 return 1;
2250 } else if (dig_a < dig_b) {
2251 return -1;
2252 }
2253 }
2254
2255 return 0;
2256 }
2257
2258 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2259 int *overflow)
2260 {
2261 int carry = 0;
2262 int i;
2263 int is_zero = 1;
2264
2265 for (i = 1; i <= 31; i++) {
2266 uint8_t digit = bcd_get_digit(a, i, invalid) +
2267 bcd_get_digit(b, i, invalid) + carry;
2268 is_zero &= (digit == 0);
2269 if (digit > 9) {
2270 carry = 1;
2271 digit -= 10;
2272 } else {
2273 carry = 0;
2274 }
2275
2276 bcd_put_digit(t, digit, i);
2277 }
2278
2279 *overflow = carry;
2280 return is_zero;
2281 }
2282
2283 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2284 int *overflow)
2285 {
2286 int carry = 0;
2287 int i;
2288
2289 for (i = 1; i <= 31; i++) {
2290 uint8_t digit = bcd_get_digit(a, i, invalid) -
2291 bcd_get_digit(b, i, invalid) + carry;
2292 if (digit & 0x80) {
2293 carry = -1;
2294 digit += 10;
2295 } else {
2296 carry = 0;
2297 }
2298
2299 bcd_put_digit(t, digit, i);
2300 }
2301
2302 *overflow = carry;
2303 }
2304
2305 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2306 {
2307
2308 int sgna = bcd_get_sgn(a);
2309 int sgnb = bcd_get_sgn(b);
2310 int invalid = (sgna == 0) || (sgnb == 0);
2311 int overflow = 0;
2312 int zero = 0;
2313 uint32_t cr = 0;
2314 ppc_avr_t result = { .u64 = { 0, 0 } };
2315
2316 if (!invalid) {
2317 if (sgna == sgnb) {
2318 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2319 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2320 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2321 } else {
2322 int magnitude = bcd_cmp_mag(a, b);
2323 if (magnitude > 0) {
2324 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2325 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2326 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2327 } else if (magnitude < 0) {
2328 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2329 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2330 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2331 } else {
2332 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2333 cr = CRF_EQ;
2334 }
2335 }
2336 }
2337
2338 if (unlikely(invalid)) {
2339 result.VsrD(0) = result.VsrD(1) = -1;
2340 cr = CRF_SO;
2341 } else if (overflow) {
2342 cr |= CRF_SO;
2343 } else if (zero) {
2344 cr |= CRF_EQ;
2345 }
2346
2347 *r = result;
2348
2349 return cr;
2350 }
2351
2352 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2353 {
2354 ppc_avr_t bcopy = *b;
2355 int sgnb = bcd_get_sgn(b);
2356 if (sgnb < 0) {
2357 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2358 } else if (sgnb > 0) {
2359 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2360 }
2361 /* else invalid ... defer to bcdadd code for proper handling */
2362
2363 return helper_bcdadd(r, a, &bcopy, ps);
2364 }
2365
2366 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2367 {
2368 int i;
2369 int cr = 0;
2370 uint16_t national = 0;
2371 uint16_t sgnb = get_national_digit(b, 0);
2372 ppc_avr_t ret = { .u64 = { 0, 0 } };
2373 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2374
2375 for (i = 1; i < 8; i++) {
2376 national = get_national_digit(b, i);
2377 if (unlikely(national < 0x30 || national > 0x39)) {
2378 invalid = 1;
2379 break;
2380 }
2381
2382 bcd_put_digit(&ret, national & 0xf, i);
2383 }
2384
2385 if (sgnb == NATIONAL_PLUS) {
2386 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2387 } else {
2388 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2389 }
2390
2391 cr = bcd_cmp_zero(&ret);
2392
2393 if (unlikely(invalid)) {
2394 cr = CRF_SO;
2395 }
2396
2397 *r = ret;
2398
2399 return cr;
2400 }
2401
2402 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2403 {
2404 int i;
2405 int cr = 0;
2406 int sgnb = bcd_get_sgn(b);
2407 int invalid = (sgnb == 0);
2408 ppc_avr_t ret = { .u64 = { 0, 0 } };
2409
2410 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2411
2412 for (i = 1; i < 8; i++) {
2413 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2414
2415 if (unlikely(invalid)) {
2416 break;
2417 }
2418 }
2419 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2420
2421 cr = bcd_cmp_zero(b);
2422
2423 if (ox_flag) {
2424 cr |= CRF_SO;
2425 }
2426
2427 if (unlikely(invalid)) {
2428 cr = CRF_SO;
2429 }
2430
2431 *r = ret;
2432
2433 return cr;
2434 }
2435
2436 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2437 {
2438 int i;
2439 int cr = 0;
2440 int invalid = 0;
2441 int zone_digit = 0;
2442 int zone_lead = ps ? 0xF : 0x3;
2443 int digit = 0;
2444 ppc_avr_t ret = { .u64 = { 0, 0 } };
2445 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2446
2447 if (unlikely((sgnb < 0xA) && ps)) {
2448 invalid = 1;
2449 }
2450
2451 for (i = 0; i < 16; i++) {
2452 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2453 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2454 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2455 invalid = 1;
2456 break;
2457 }
2458
2459 bcd_put_digit(&ret, digit, i + 1);
2460 }
2461
2462 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2463 (!ps && (sgnb & 0x4))) {
2464 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2465 } else {
2466 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2467 }
2468
2469 cr = bcd_cmp_zero(&ret);
2470
2471 if (unlikely(invalid)) {
2472 cr = CRF_SO;
2473 }
2474
2475 *r = ret;
2476
2477 return cr;
2478 }
2479
2480 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2481 {
2482 int i;
2483 int cr = 0;
2484 uint8_t digit = 0;
2485 int sgnb = bcd_get_sgn(b);
2486 int zone_lead = (ps) ? 0xF0 : 0x30;
2487 int invalid = (sgnb == 0);
2488 ppc_avr_t ret = { .u64 = { 0, 0 } };
2489
2490 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2491
2492 for (i = 0; i < 16; i++) {
2493 digit = bcd_get_digit(b, i + 1, &invalid);
2494
2495 if (unlikely(invalid)) {
2496 break;
2497 }
2498
2499 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2500 }
2501
2502 if (ps) {
2503 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2504 } else {
2505 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2506 }
2507
2508 cr = bcd_cmp_zero(b);
2509
2510 if (ox_flag) {
2511 cr |= CRF_SO;
2512 }
2513
2514 if (unlikely(invalid)) {
2515 cr = CRF_SO;
2516 }
2517
2518 *r = ret;
2519
2520 return cr;
2521 }
2522
2523 /**
2524 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2525 *
2526 * Returns:
2527 * > 0 if ahi|alo > bhi|blo,
2528 * 0 if ahi|alo == bhi|blo,
2529 * < 0 if ahi|alo < bhi|blo
2530 */
2531 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2532 uint64_t blo, uint64_t bhi)
2533 {
2534 return (ahi == bhi) ?
2535 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2536 (ahi > bhi ? 1 : -1);
2537 }
2538
2539 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2540 {
2541 int i;
2542 int cr;
2543 uint64_t lo_value;
2544 uint64_t hi_value;
2545 uint64_t rem;
2546 ppc_avr_t ret = { .u64 = { 0, 0 } };
2547
2548 if (b->VsrSD(0) < 0) {
2549 lo_value = -b->VsrSD(1);
2550 hi_value = ~b->VsrD(0) + !lo_value;
2551 bcd_put_digit(&ret, 0xD, 0);
2552
2553 cr = CRF_LT;
2554 } else {
2555 lo_value = b->VsrD(1);
2556 hi_value = b->VsrD(0);
2557 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2558
2559 if (hi_value == 0 && lo_value == 0) {
2560 cr = CRF_EQ;
2561 } else {
2562 cr = CRF_GT;
2563 }
2564 }
2565
2566 /*
2567 * Check src limits: abs(src) <= 10^31 - 1
2568 *
2569 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2570 */
2571 if (ucmp128(lo_value, hi_value,
2572 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2573 cr |= CRF_SO;
2574
2575 /*
2576 * According to the ISA, if src wouldn't fit in the destination
2577 * register, the result is undefined.
2578 * In that case, we leave r unchanged.
2579 */
2580 } else {
2581 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2582
2583 for (i = 1; i < 16; rem /= 10, i++) {
2584 bcd_put_digit(&ret, rem % 10, i);
2585 }
2586
2587 for (; i < 32; lo_value /= 10, i++) {
2588 bcd_put_digit(&ret, lo_value % 10, i);
2589 }
2590
2591 *r = ret;
2592 }
2593
2594 return cr;
2595 }
2596
2597 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2598 {
2599 uint8_t i;
2600 int cr;
2601 uint64_t carry;
2602 uint64_t unused;
2603 uint64_t lo_value;
2604 uint64_t hi_value = 0;
2605 int sgnb = bcd_get_sgn(b);
2606 int invalid = (sgnb == 0);
2607
2608 lo_value = bcd_get_digit(b, 31, &invalid);
2609 for (i = 30; i > 0; i--) {
2610 mulu64(&lo_value, &carry, lo_value, 10ULL);
2611 mulu64(&hi_value, &unused, hi_value, 10ULL);
2612 lo_value += bcd_get_digit(b, i, &invalid);
2613 hi_value += carry;
2614
2615 if (unlikely(invalid)) {
2616 break;
2617 }
2618 }
2619
2620 if (sgnb == -1) {
2621 r->VsrSD(1) = -lo_value;
2622 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2623 } else {
2624 r->VsrSD(1) = lo_value;
2625 r->VsrSD(0) = hi_value;
2626 }
2627
2628 cr = bcd_cmp_zero(b);
2629
2630 if (unlikely(invalid)) {
2631 cr = CRF_SO;
2632 }
2633
2634 return cr;
2635 }
2636
2637 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2638 {
2639 int i;
2640 int invalid = 0;
2641
2642 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2643 return CRF_SO;
2644 }
2645
2646 *r = *a;
2647 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2648
2649 for (i = 1; i < 32; i++) {
2650 bcd_get_digit(a, i, &invalid);
2651 bcd_get_digit(b, i, &invalid);
2652 if (unlikely(invalid)) {
2653 return CRF_SO;
2654 }
2655 }
2656
2657 return bcd_cmp_zero(r);
2658 }
2659
2660 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2661 {
2662 int sgnb = bcd_get_sgn(b);
2663
2664 *r = *b;
2665 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2666
2667 if (bcd_is_valid(b) == false) {
2668 return CRF_SO;
2669 }
2670
2671 return bcd_cmp_zero(r);
2672 }
2673
2674 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2675 {
2676 int cr;
2677 int i = a->VsrSB(7);
2678 bool ox_flag = false;
2679 int sgnb = bcd_get_sgn(b);
2680 ppc_avr_t ret = *b;
2681 ret.VsrD(1) &= ~0xf;
2682
2683 if (bcd_is_valid(b) == false) {
2684 return CRF_SO;
2685 }
2686
2687 if (unlikely(i > 31)) {
2688 i = 31;
2689 } else if (unlikely(i < -31)) {
2690 i = -31;
2691 }
2692
2693 if (i > 0) {
2694 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2695 } else {
2696 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2697 }
2698 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2699
2700 *r = ret;
2701
2702 cr = bcd_cmp_zero(r);
2703 if (ox_flag) {
2704 cr |= CRF_SO;
2705 }
2706
2707 return cr;
2708 }
2709
2710 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2711 {
2712 int cr;
2713 int i;
2714 int invalid = 0;
2715 bool ox_flag = false;
2716 ppc_avr_t ret = *b;
2717
2718 for (i = 0; i < 32; i++) {
2719 bcd_get_digit(b, i, &invalid);
2720
2721 if (unlikely(invalid)) {
2722 return CRF_SO;
2723 }
2724 }
2725
2726 i = a->VsrSB(7);
2727 if (i >= 32) {
2728 ox_flag = true;
2729 ret.VsrD(1) = ret.VsrD(0) = 0;
2730 } else if (i <= -32) {
2731 ret.VsrD(1) = ret.VsrD(0) = 0;
2732 } else if (i > 0) {
2733 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2734 } else {
2735 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2736 }
2737 *r = ret;
2738
2739 cr = bcd_cmp_zero(r);
2740 if (ox_flag) {
2741 cr |= CRF_SO;
2742 }
2743
2744 return cr;
2745 }
2746
2747 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2748 {
2749 int cr;
2750 int unused = 0;
2751 int invalid = 0;
2752 bool ox_flag = false;
2753 int sgnb = bcd_get_sgn(b);
2754 ppc_avr_t ret = *b;
2755 ret.VsrD(1) &= ~0xf;
2756
2757 int i = a->VsrSB(7);
2758 ppc_avr_t bcd_one;
2759
2760 bcd_one.VsrD(0) = 0;
2761 bcd_one.VsrD(1) = 0x10;
2762
2763 if (bcd_is_valid(b) == false) {
2764 return CRF_SO;
2765 }
2766
2767 if (unlikely(i > 31)) {
2768 i = 31;
2769 } else if (unlikely(i < -31)) {
2770 i = -31;
2771 }
2772
2773 if (i > 0) {
2774 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2775 } else {
2776 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2777
2778 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2779 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2780 }
2781 }
2782 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2783
2784 cr = bcd_cmp_zero(&ret);
2785 if (ox_flag) {
2786 cr |= CRF_SO;
2787 }
2788 *r = ret;
2789
2790 return cr;
2791 }
2792
2793 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2794 {
2795 uint64_t mask;
2796 uint32_t ox_flag = 0;
2797 int i = a->VsrSH(3) + 1;
2798 ppc_avr_t ret = *b;
2799
2800 if (bcd_is_valid(b) == false) {
2801 return CRF_SO;
2802 }
2803
2804 if (i > 16 && i < 32) {
2805 mask = (uint64_t)-1 >> (128 - i * 4);
2806 if (ret.VsrD(0) & ~mask) {
2807 ox_flag = CRF_SO;
2808 }
2809
2810 ret.VsrD(0) &= mask;
2811 } else if (i >= 0 && i <= 16) {
2812 mask = (uint64_t)-1 >> (64 - i * 4);
2813 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2814 ox_flag = CRF_SO;
2815 }
2816
2817 ret.VsrD(1) &= mask;
2818 ret.VsrD(0) = 0;
2819 }
2820 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2821 *r = ret;
2822
2823 return bcd_cmp_zero(&ret) | ox_flag;
2824 }
2825
2826 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2827 {
2828 int i;
2829 uint64_t mask;
2830 uint32_t ox_flag = 0;
2831 int invalid = 0;
2832 ppc_avr_t ret = *b;
2833
2834 for (i = 0; i < 32; i++) {
2835 bcd_get_digit(b, i, &invalid);
2836
2837 if (unlikely(invalid)) {
2838 return CRF_SO;
2839 }
2840 }
2841
2842 i = a->VsrSH(3);
2843 if (i > 16 && i < 33) {
2844 mask = (uint64_t)-1 >> (128 - i * 4);
2845 if (ret.VsrD(0) & ~mask) {
2846 ox_flag = CRF_SO;
2847 }
2848
2849 ret.VsrD(0) &= mask;
2850 } else if (i > 0 && i <= 16) {
2851 mask = (uint64_t)-1 >> (64 - i * 4);
2852 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2853 ox_flag = CRF_SO;
2854 }
2855
2856 ret.VsrD(1) &= mask;
2857 ret.VsrD(0) = 0;
2858 } else if (i == 0) {
2859 if (ret.VsrD(0) || ret.VsrD(1)) {
2860 ox_flag = CRF_SO;
2861 }
2862 ret.VsrD(0) = ret.VsrD(1) = 0;
2863 }
2864
2865 *r = ret;
2866 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2867 return ox_flag | CRF_EQ;
2868 }
2869
2870 return ox_flag | CRF_GT;
2871 }
2872
2873 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2874 {
2875 int i;
2876 VECTOR_FOR_INORDER_I(i, u8) {
2877 r->u8[i] = AES_sbox[a->u8[i]];
2878 }
2879 }
2880
2881 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2882 {
2883 ppc_avr_t result;
2884 int i;
2885
2886 VECTOR_FOR_INORDER_I(i, u32) {
2887 result.VsrW(i) = b->VsrW(i) ^
2888 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2889 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2890 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2891 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2892 }
2893 *r = result;
2894 }
2895
2896 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2897 {
2898 ppc_avr_t result;
2899 int i;
2900
2901 VECTOR_FOR_INORDER_I(i, u8) {
2902 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2903 }
2904 *r = result;
2905 }
2906
2907 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2908 {
2909 /* This differs from what is written in ISA V2.07. The RTL is */
2910 /* incorrect and will be fixed in V2.07B. */
2911 int i;
2912 ppc_avr_t tmp;
2913
2914 VECTOR_FOR_INORDER_I(i, u8) {
2915 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2916 }
2917
2918 VECTOR_FOR_INORDER_I(i, u32) {
2919 r->VsrW(i) =
2920 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2921 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2922 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2923 AES_imc[tmp.VsrB(4 * i + 3)][3];
2924 }
2925 }
2926
2927 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2928 {
2929 ppc_avr_t result;
2930 int i;
2931
2932 VECTOR_FOR_INORDER_I(i, u8) {
2933 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2934 }
2935 *r = result;
2936 }
2937
2938 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2939 {
2940 int st = (st_six & 0x10) != 0;
2941 int six = st_six & 0xF;
2942 int i;
2943
2944 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2945 if (st == 0) {
2946 if ((six & (0x8 >> i)) == 0) {
2947 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2948 ror32(a->VsrW(i), 18) ^
2949 (a->VsrW(i) >> 3);
2950 } else { /* six.bit[i] == 1 */
2951 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2952 ror32(a->VsrW(i), 19) ^
2953 (a->VsrW(i) >> 10);
2954 }
2955 } else { /* st == 1 */
2956 if ((six & (0x8 >> i)) == 0) {
2957 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2958 ror32(a->VsrW(i), 13) ^
2959 ror32(a->VsrW(i), 22);
2960 } else { /* six.bit[i] == 1 */
2961 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2962 ror32(a->VsrW(i), 11) ^
2963 ror32(a->VsrW(i), 25);
2964 }
2965 }
2966 }
2967 }
2968
2969 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2970 {
2971 int st = (st_six & 0x10) != 0;
2972 int six = st_six & 0xF;
2973 int i;
2974
2975 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2976 if (st == 0) {
2977 if ((six & (0x8 >> (2 * i))) == 0) {
2978 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2979 ror64(a->VsrD(i), 8) ^
2980 (a->VsrD(i) >> 7);
2981 } else { /* six.bit[2*i] == 1 */
2982 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2983 ror64(a->VsrD(i), 61) ^
2984 (a->VsrD(i) >> 6);
2985 }
2986 } else { /* st == 1 */
2987 if ((six & (0x8 >> (2 * i))) == 0) {
2988 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2989 ror64(a->VsrD(i), 34) ^
2990 ror64(a->VsrD(i), 39);
2991 } else { /* six.bit[2*i] == 1 */
2992 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2993 ror64(a->VsrD(i), 18) ^
2994 ror64(a->VsrD(i), 41);
2995 }
2996 }
2997 }
2998 }
2999
3000 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3001 {
3002 ppc_avr_t result;
3003 int i;
3004
3005 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3006 int indexA = c->VsrB(i) >> 4;
3007 int indexB = c->VsrB(i) & 0xF;
3008
3009 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3010 }
3011 *r = result;
3012 }
3013
3014 #undef VECTOR_FOR_INORDER_I
3015
3016 /*****************************************************************************/
3017 /* SPE extension helpers */
3018 /* Use a table to make this quicker */
3019 static const uint8_t hbrev[16] = {
3020 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3021 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3022 };
3023
3024 static inline uint8_t byte_reverse(uint8_t val)
3025 {
3026 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3027 }
3028
3029 static inline uint32_t word_reverse(uint32_t val)
3030 {
3031 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3032 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3033 }
3034
3035 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3036 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3037 {
3038 uint32_t a, b, d, mask;
3039
3040 mask = UINT32_MAX >> (32 - MASKBITS);
3041 a = arg1 & mask;
3042 b = arg2 & mask;
3043 d = word_reverse(1 + word_reverse(a | ~b));
3044 return (arg1 & ~mask) | (d & b);
3045 }
3046
3047 uint32_t helper_cntlsw32(uint32_t val)
3048 {
3049 if (val & 0x80000000) {
3050 return clz32(~val);
3051 } else {
3052 return clz32(val);
3053 }
3054 }
3055
3056 uint32_t helper_cntlzw32(uint32_t val)
3057 {
3058 return clz32(val);
3059 }
3060
3061 /* 440 specific */
3062 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3063 target_ulong low, uint32_t update_Rc)
3064 {
3065 target_ulong mask;
3066 int i;
3067
3068 i = 1;
3069 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3070 if ((high & mask) == 0) {
3071 if (update_Rc) {
3072 env->crf[0] = 0x4;
3073 }
3074 goto done;
3075 }
3076 i++;
3077 }
3078 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3079 if ((low & mask) == 0) {
3080 if (update_Rc) {
3081 env->crf[0] = 0x8;
3082 }
3083 goto done;
3084 }
3085 i++;
3086 }
3087 i = 8;
3088 if (update_Rc) {
3089 env->crf[0] = 0x2;
3090 }
3091 done:
3092 env->xer = (env->xer & ~0x7F) | i;
3093 if (update_Rc) {
3094 env->crf[0] |= xer_so;
3095 }
3096 return i;
3097 }