]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target/ppc: Remove support for the PowerPC 602 CPU
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
35
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 {
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
42 }
43 }
44
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
47 {
48 uint64_t rt = 0;
49 int overflow = 0;
50
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
53
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
59 }
60
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
63 }
64
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
67 }
68
69 return (target_ulong)rt;
70 }
71
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
74 {
75 int64_t rt = 0;
76 int overflow = 0;
77
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
80
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
87 }
88
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
91 }
92
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
95 }
96
97 return (target_ulong)rt;
98 }
99
100 #if defined(TARGET_PPC64)
101
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 {
104 uint64_t rt = 0;
105 int overflow = 0;
106
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
112 }
113
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
116 }
117
118 return rt;
119 }
120
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 {
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
127
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
133 }
134
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
137 }
138
139 return rt;
140 }
141
142 #endif
143
144
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
148
149 /*
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
154 */
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
156
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
159
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
161 {
162 return hasvalue(rb, ra) ? CRF_GT : 0;
163 }
164
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
168
169 /*
170 * Return a random number.
171 */
172 uint64_t helper_darn32(void)
173 {
174 Error *err = NULL;
175 uint32_t ret;
176
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
182 }
183
184 return ret;
185 }
186
187 uint64_t helper_darn64(void)
188 {
189 Error *err = NULL;
190 uint64_t ret;
191
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
197 }
198
199 return ret;
200 }
201
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
203 {
204 int i;
205 uint64_t ra = 0;
206
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
212 }
213 }
214 }
215 return ra;
216 }
217
218 #endif
219
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
221 {
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
225
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
229 }
230 mask <<= 8;
231 }
232 return ra;
233 }
234
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
238 {
239 int32_t ret;
240
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
249 }
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
253 }
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
257 }
258 return (target_long)ret;
259 }
260
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
264 {
265 int64_t ret;
266
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
275 }
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
279 }
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
283 }
284 return ret;
285 }
286 #endif
287
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
299 }
300
301 target_ulong helper_popcntw(target_ulong val)
302 {
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
315 }
316 #else
317 target_ulong helper_popcntb(target_ulong val)
318 {
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
324 }
325 #endif
326
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
328 {
329 /*
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
334 */
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
338
339 if (mask == 0 || mask == -1) {
340 return src;
341 }
342
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
349 }
350
351 /*
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
355 */
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
361 }
362
363 /*
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
368 */
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
374 }
375
376 /*
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
379 */
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
384 }
385
386 return left | (right >> n);
387 }
388
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
390 {
391 int i, o;
392 uint64_t result = 0;
393
394 if (mask == -1) {
395 return src;
396 }
397
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
402 }
403
404 return result;
405 }
406
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
408 {
409 int i, o;
410 uint64_t result = 0;
411
412 if (mask == -1) {
413 return src;
414 }
415
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
420 }
421
422 return result;
423 }
424
425 /*****************************************************************************/
426 /* PowerPC 601 specific instructions (POWER bridge) */
427 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
428 {
429 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
430
431 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
432 (int32_t)arg2 == 0) {
433 env->spr[SPR_MQ] = 0;
434 return INT32_MIN;
435 } else {
436 env->spr[SPR_MQ] = tmp % arg2;
437 return tmp / (int32_t)arg2;
438 }
439 }
440
441 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
442 target_ulong arg2)
443 {
444 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
445
446 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
447 (int32_t)arg2 == 0) {
448 env->so = env->ov = 1;
449 env->spr[SPR_MQ] = 0;
450 return INT32_MIN;
451 } else {
452 env->spr[SPR_MQ] = tmp % arg2;
453 tmp /= (int32_t)arg2;
454 if ((int32_t)tmp != tmp) {
455 env->so = env->ov = 1;
456 } else {
457 env->ov = 0;
458 }
459 return tmp;
460 }
461 }
462
463 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
464 target_ulong arg2)
465 {
466 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
467 (int32_t)arg2 == 0) {
468 env->spr[SPR_MQ] = 0;
469 return INT32_MIN;
470 } else {
471 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
472 return (int32_t)arg1 / (int32_t)arg2;
473 }
474 }
475
476 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
477 target_ulong arg2)
478 {
479 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
480 (int32_t)arg2 == 0) {
481 env->so = env->ov = 1;
482 env->spr[SPR_MQ] = 0;
483 return INT32_MIN;
484 } else {
485 env->ov = 0;
486 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
487 return (int32_t)arg1 / (int32_t)arg2;
488 }
489 }
490
491 /*****************************************************************************/
492 /* Altivec extension helpers */
493 #if defined(HOST_WORDS_BIGENDIAN)
494 #define VECTOR_FOR_INORDER_I(index, element) \
495 for (index = 0; index < ARRAY_SIZE(r->element); index++)
496 #else
497 #define VECTOR_FOR_INORDER_I(index, element) \
498 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
499 #endif
500
501 /* Saturating arithmetic helpers. */
502 #define SATCVT(from, to, from_type, to_type, min, max) \
503 static inline to_type cvt##from##to(from_type x, int *sat) \
504 { \
505 to_type r; \
506 \
507 if (x < (from_type)min) { \
508 r = min; \
509 *sat = 1; \
510 } else if (x > (from_type)max) { \
511 r = max; \
512 *sat = 1; \
513 } else { \
514 r = x; \
515 } \
516 return r; \
517 }
518 #define SATCVTU(from, to, from_type, to_type, min, max) \
519 static inline to_type cvt##from##to(from_type x, int *sat) \
520 { \
521 to_type r; \
522 \
523 if (x > (from_type)max) { \
524 r = max; \
525 *sat = 1; \
526 } else { \
527 r = x; \
528 } \
529 return r; \
530 }
531 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
532 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
533 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
534
535 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
536 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
537 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
538 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
539 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
540 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
541 #undef SATCVT
542 #undef SATCVTU
543
544 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
545 {
546 ppc_store_vscr(env, vscr);
547 }
548
549 uint32_t helper_mfvscr(CPUPPCState *env)
550 {
551 return ppc_get_vscr(env);
552 }
553
554 static inline void set_vscr_sat(CPUPPCState *env)
555 {
556 /* The choice of non-zero value is arbitrary. */
557 env->vscr_sat.u32[0] = 1;
558 }
559
560 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
561 {
562 int i;
563
564 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
565 r->u32[i] = ~a->u32[i] < b->u32[i];
566 }
567 }
568
569 /* vprtybw */
570 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
571 {
572 int i;
573 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
574 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
575 res ^= res >> 8;
576 r->u32[i] = res & 1;
577 }
578 }
579
580 /* vprtybd */
581 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
582 {
583 int i;
584 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
585 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
586 res ^= res >> 16;
587 res ^= res >> 8;
588 r->u64[i] = res & 1;
589 }
590 }
591
592 /* vprtybq */
593 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
594 {
595 uint64_t res = b->u64[0] ^ b->u64[1];
596 res ^= res >> 32;
597 res ^= res >> 16;
598 res ^= res >> 8;
599 r->VsrD(1) = res & 1;
600 r->VsrD(0) = 0;
601 }
602
603 #define VARITHFP(suffix, func) \
604 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
605 ppc_avr_t *b) \
606 { \
607 int i; \
608 \
609 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
610 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
611 } \
612 }
613 VARITHFP(addfp, float32_add)
614 VARITHFP(subfp, float32_sub)
615 VARITHFP(minfp, float32_min)
616 VARITHFP(maxfp, float32_max)
617 #undef VARITHFP
618
619 #define VARITHFPFMA(suffix, type) \
620 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
621 ppc_avr_t *b, ppc_avr_t *c) \
622 { \
623 int i; \
624 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
625 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
626 type, &env->vec_status); \
627 } \
628 }
629 VARITHFPFMA(maddfp, 0);
630 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
631 #undef VARITHFPFMA
632
633 #define VARITHSAT_CASE(type, op, cvt, element) \
634 { \
635 type result = (type)a->element[i] op (type)b->element[i]; \
636 r->element[i] = cvt(result, &sat); \
637 }
638
639 #define VARITHSAT_DO(name, op, optype, cvt, element) \
640 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
641 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
642 { \
643 int sat = 0; \
644 int i; \
645 \
646 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
647 VARITHSAT_CASE(optype, op, cvt, element); \
648 } \
649 if (sat) { \
650 vscr_sat->u32[0] = 1; \
651 } \
652 }
653 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
654 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
655 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
656 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
657 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
658 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
659 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
660 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
661 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
662 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
663 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
664 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
665 #undef VARITHSAT_CASE
666 #undef VARITHSAT_DO
667 #undef VARITHSAT_SIGNED
668 #undef VARITHSAT_UNSIGNED
669
670 #define VAVG_DO(name, element, etype) \
671 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
672 { \
673 int i; \
674 \
675 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
676 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
677 r->element[i] = x >> 1; \
678 } \
679 }
680
681 #define VAVG(type, signed_element, signed_type, unsigned_element, \
682 unsigned_type) \
683 VAVG_DO(avgs##type, signed_element, signed_type) \
684 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
685 VAVG(b, s8, int16_t, u8, uint16_t)
686 VAVG(h, s16, int32_t, u16, uint32_t)
687 VAVG(w, s32, int64_t, u32, uint64_t)
688 #undef VAVG_DO
689 #undef VAVG
690
691 #define VABSDU_DO(name, element) \
692 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
693 { \
694 int i; \
695 \
696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
697 r->element[i] = (a->element[i] > b->element[i]) ? \
698 (a->element[i] - b->element[i]) : \
699 (b->element[i] - a->element[i]); \
700 } \
701 }
702
703 /*
704 * VABSDU - Vector absolute difference unsigned
705 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
706 * element - element type to access from vector
707 */
708 #define VABSDU(type, element) \
709 VABSDU_DO(absdu##type, element)
710 VABSDU(b, u8)
711 VABSDU(h, u16)
712 VABSDU(w, u32)
713 #undef VABSDU_DO
714 #undef VABSDU
715
716 #define VCF(suffix, cvt, element) \
717 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
718 ppc_avr_t *b, uint32_t uim) \
719 { \
720 int i; \
721 \
722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
723 float32 t = cvt(b->element[i], &env->vec_status); \
724 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
725 } \
726 }
727 VCF(ux, uint32_to_float32, u32)
728 VCF(sx, int32_to_float32, s32)
729 #undef VCF
730
731 #define VCMP_DO(suffix, compare, element, record) \
732 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
733 ppc_avr_t *a, ppc_avr_t *b) \
734 { \
735 uint64_t ones = (uint64_t)-1; \
736 uint64_t all = ones; \
737 uint64_t none = 0; \
738 int i; \
739 \
740 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
741 uint64_t result = (a->element[i] compare b->element[i] ? \
742 ones : 0x0); \
743 switch (sizeof(a->element[0])) { \
744 case 8: \
745 r->u64[i] = result; \
746 break; \
747 case 4: \
748 r->u32[i] = result; \
749 break; \
750 case 2: \
751 r->u16[i] = result; \
752 break; \
753 case 1: \
754 r->u8[i] = result; \
755 break; \
756 } \
757 all &= result; \
758 none |= result; \
759 } \
760 if (record) { \
761 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
762 } \
763 }
764 #define VCMP(suffix, compare, element) \
765 VCMP_DO(suffix, compare, element, 0) \
766 VCMP_DO(suffix##_dot, compare, element, 1)
767 VCMP(equb, ==, u8)
768 VCMP(equh, ==, u16)
769 VCMP(equw, ==, u32)
770 VCMP(equd, ==, u64)
771 VCMP(gtub, >, u8)
772 VCMP(gtuh, >, u16)
773 VCMP(gtuw, >, u32)
774 VCMP(gtud, >, u64)
775 VCMP(gtsb, >, s8)
776 VCMP(gtsh, >, s16)
777 VCMP(gtsw, >, s32)
778 VCMP(gtsd, >, s64)
779 #undef VCMP_DO
780 #undef VCMP
781
782 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
783 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
784 ppc_avr_t *a, ppc_avr_t *b) \
785 { \
786 etype ones = (etype)-1; \
787 etype all = ones; \
788 etype result, none = 0; \
789 int i; \
790 \
791 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
792 if (cmpzero) { \
793 result = ((a->element[i] == 0) \
794 || (b->element[i] == 0) \
795 || (a->element[i] != b->element[i]) ? \
796 ones : 0x0); \
797 } else { \
798 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
799 } \
800 r->element[i] = result; \
801 all &= result; \
802 none |= result; \
803 } \
804 if (record) { \
805 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
806 } \
807 }
808
809 /*
810 * VCMPNEZ - Vector compare not equal to zero
811 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
812 * element - element type to access from vector
813 */
814 #define VCMPNE(suffix, element, etype, cmpzero) \
815 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
816 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
817 VCMPNE(zb, u8, uint8_t, 1)
818 VCMPNE(zh, u16, uint16_t, 1)
819 VCMPNE(zw, u32, uint32_t, 1)
820 VCMPNE(b, u8, uint8_t, 0)
821 VCMPNE(h, u16, uint16_t, 0)
822 VCMPNE(w, u32, uint32_t, 0)
823 #undef VCMPNE_DO
824 #undef VCMPNE
825
826 #define VCMPFP_DO(suffix, compare, order, record) \
827 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
828 ppc_avr_t *a, ppc_avr_t *b) \
829 { \
830 uint32_t ones = (uint32_t)-1; \
831 uint32_t all = ones; \
832 uint32_t none = 0; \
833 int i; \
834 \
835 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
836 uint32_t result; \
837 FloatRelation rel = \
838 float32_compare_quiet(a->f32[i], b->f32[i], \
839 &env->vec_status); \
840 if (rel == float_relation_unordered) { \
841 result = 0; \
842 } else if (rel compare order) { \
843 result = ones; \
844 } else { \
845 result = 0; \
846 } \
847 r->u32[i] = result; \
848 all &= result; \
849 none |= result; \
850 } \
851 if (record) { \
852 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
853 } \
854 }
855 #define VCMPFP(suffix, compare, order) \
856 VCMPFP_DO(suffix, compare, order, 0) \
857 VCMPFP_DO(suffix##_dot, compare, order, 1)
858 VCMPFP(eqfp, ==, float_relation_equal)
859 VCMPFP(gefp, !=, float_relation_less)
860 VCMPFP(gtfp, ==, float_relation_greater)
861 #undef VCMPFP_DO
862 #undef VCMPFP
863
864 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
865 ppc_avr_t *a, ppc_avr_t *b, int record)
866 {
867 int i;
868 int all_in = 0;
869
870 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
871 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
872 &env->vec_status);
873 if (le_rel == float_relation_unordered) {
874 r->u32[i] = 0xc0000000;
875 all_in = 1;
876 } else {
877 float32 bneg = float32_chs(b->f32[i]);
878 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
879 &env->vec_status);
880 int le = le_rel != float_relation_greater;
881 int ge = ge_rel != float_relation_less;
882
883 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
884 all_in |= (!le | !ge);
885 }
886 }
887 if (record) {
888 env->crf[6] = (all_in == 0) << 1;
889 }
890 }
891
892 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
893 {
894 vcmpbfp_internal(env, r, a, b, 0);
895 }
896
897 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
898 ppc_avr_t *b)
899 {
900 vcmpbfp_internal(env, r, a, b, 1);
901 }
902
903 #define VCT(suffix, satcvt, element) \
904 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
905 ppc_avr_t *b, uint32_t uim) \
906 { \
907 int i; \
908 int sat = 0; \
909 float_status s = env->vec_status; \
910 \
911 set_float_rounding_mode(float_round_to_zero, &s); \
912 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
913 if (float32_is_any_nan(b->f32[i])) { \
914 r->element[i] = 0; \
915 } else { \
916 float64 t = float32_to_float64(b->f32[i], &s); \
917 int64_t j; \
918 \
919 t = float64_scalbn(t, uim, &s); \
920 j = float64_to_int64(t, &s); \
921 r->element[i] = satcvt(j, &sat); \
922 } \
923 } \
924 if (sat) { \
925 set_vscr_sat(env); \
926 } \
927 }
928 VCT(uxs, cvtsduw, u32)
929 VCT(sxs, cvtsdsw, s32)
930 #undef VCT
931
932 target_ulong helper_vclzlsbb(ppc_avr_t *r)
933 {
934 target_ulong count = 0;
935 int i;
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 if (r->VsrB(i) & 0x01) {
938 break;
939 }
940 count++;
941 }
942 return count;
943 }
944
945 target_ulong helper_vctzlsbb(ppc_avr_t *r)
946 {
947 target_ulong count = 0;
948 int i;
949 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
950 if (r->VsrB(i) & 0x01) {
951 break;
952 }
953 count++;
954 }
955 return count;
956 }
957
958 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
959 ppc_avr_t *b, ppc_avr_t *c)
960 {
961 int sat = 0;
962 int i;
963
964 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
965 int32_t prod = a->s16[i] * b->s16[i];
966 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
967
968 r->s16[i] = cvtswsh(t, &sat);
969 }
970
971 if (sat) {
972 set_vscr_sat(env);
973 }
974 }
975
976 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
977 ppc_avr_t *b, ppc_avr_t *c)
978 {
979 int sat = 0;
980 int i;
981
982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
983 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
984 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
985 r->s16[i] = cvtswsh(t, &sat);
986 }
987
988 if (sat) {
989 set_vscr_sat(env);
990 }
991 }
992
993 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
994 {
995 int i;
996
997 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
998 int32_t prod = a->s16[i] * b->s16[i];
999 r->s16[i] = (int16_t) (prod + c->s16[i]);
1000 }
1001 }
1002
1003 #define VMRG_DO(name, element, access, ofs) \
1004 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1005 { \
1006 ppc_avr_t result; \
1007 int i, half = ARRAY_SIZE(r->element) / 2; \
1008 \
1009 for (i = 0; i < half; i++) { \
1010 result.access(i * 2 + 0) = a->access(i + ofs); \
1011 result.access(i * 2 + 1) = b->access(i + ofs); \
1012 } \
1013 *r = result; \
1014 }
1015
1016 #define VMRG(suffix, element, access) \
1017 VMRG_DO(mrgl##suffix, element, access, half) \
1018 VMRG_DO(mrgh##suffix, element, access, 0)
1019 VMRG(b, u8, VsrB)
1020 VMRG(h, u16, VsrH)
1021 VMRG(w, u32, VsrW)
1022 #undef VMRG_DO
1023 #undef VMRG
1024
1025 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1026 ppc_avr_t *b, ppc_avr_t *c)
1027 {
1028 int32_t prod[16];
1029 int i;
1030
1031 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1032 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1033 }
1034
1035 VECTOR_FOR_INORDER_I(i, s32) {
1036 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1037 prod[4 * i + 2] + prod[4 * i + 3];
1038 }
1039 }
1040
1041 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1042 ppc_avr_t *b, ppc_avr_t *c)
1043 {
1044 int32_t prod[8];
1045 int i;
1046
1047 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1048 prod[i] = a->s16[i] * b->s16[i];
1049 }
1050
1051 VECTOR_FOR_INORDER_I(i, s32) {
1052 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1053 }
1054 }
1055
1056 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1057 ppc_avr_t *b, ppc_avr_t *c)
1058 {
1059 int32_t prod[8];
1060 int i;
1061 int sat = 0;
1062
1063 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1064 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1065 }
1066
1067 VECTOR_FOR_INORDER_I(i, s32) {
1068 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1069
1070 r->u32[i] = cvtsdsw(t, &sat);
1071 }
1072
1073 if (sat) {
1074 set_vscr_sat(env);
1075 }
1076 }
1077
1078 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1079 ppc_avr_t *b, ppc_avr_t *c)
1080 {
1081 uint16_t prod[16];
1082 int i;
1083
1084 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1085 prod[i] = a->u8[i] * b->u8[i];
1086 }
1087
1088 VECTOR_FOR_INORDER_I(i, u32) {
1089 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1090 prod[4 * i + 2] + prod[4 * i + 3];
1091 }
1092 }
1093
1094 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1095 ppc_avr_t *b, ppc_avr_t *c)
1096 {
1097 uint32_t prod[8];
1098 int i;
1099
1100 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1101 prod[i] = a->u16[i] * b->u16[i];
1102 }
1103
1104 VECTOR_FOR_INORDER_I(i, u32) {
1105 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1106 }
1107 }
1108
1109 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1110 ppc_avr_t *b, ppc_avr_t *c)
1111 {
1112 uint32_t prod[8];
1113 int i;
1114 int sat = 0;
1115
1116 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1117 prod[i] = a->u16[i] * b->u16[i];
1118 }
1119
1120 VECTOR_FOR_INORDER_I(i, s32) {
1121 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1122
1123 r->u32[i] = cvtuduw(t, &sat);
1124 }
1125
1126 if (sat) {
1127 set_vscr_sat(env);
1128 }
1129 }
1130
1131 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1132 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1133 { \
1134 int i; \
1135 \
1136 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1137 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1138 (cast)b->mul_access(i); \
1139 } \
1140 }
1141
1142 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1143 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1144 { \
1145 int i; \
1146 \
1147 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1148 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1149 (cast)b->mul_access(i + 1); \
1150 } \
1151 }
1152
1153 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1154 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1155 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1156 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1157 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1158 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1159 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1160 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1161 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1162 #undef VMUL_DO_EVN
1163 #undef VMUL_DO_ODD
1164 #undef VMUL
1165
1166 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1167 {
1168 int i;
1169
1170 for (i = 0; i < 4; i++) {
1171 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
1172 }
1173 }
1174
1175 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1176 {
1177 int i;
1178
1179 for (i = 0; i < 4; i++) {
1180 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
1181 (uint64_t)b->u32[i]) >> 32);
1182 }
1183 }
1184
1185 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1186 {
1187 uint64_t discard;
1188
1189 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
1190 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
1191 }
1192
1193 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1194 {
1195 uint64_t discard;
1196
1197 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
1198 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
1199 }
1200
1201 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1202 ppc_avr_t *c)
1203 {
1204 ppc_avr_t result;
1205 int i;
1206
1207 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1208 int s = c->VsrB(i) & 0x1f;
1209 int index = s & 0xf;
1210
1211 if (s & 0x10) {
1212 result.VsrB(i) = b->VsrB(index);
1213 } else {
1214 result.VsrB(i) = a->VsrB(index);
1215 }
1216 }
1217 *r = result;
1218 }
1219
1220 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1221 ppc_avr_t *c)
1222 {
1223 ppc_avr_t result;
1224 int i;
1225
1226 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1227 int s = c->VsrB(i) & 0x1f;
1228 int index = 15 - (s & 0xf);
1229
1230 if (s & 0x10) {
1231 result.VsrB(i) = a->VsrB(index);
1232 } else {
1233 result.VsrB(i) = b->VsrB(index);
1234 }
1235 }
1236 *r = result;
1237 }
1238
1239 #if defined(HOST_WORDS_BIGENDIAN)
1240 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1241 #define VBPERMD_INDEX(i) (i)
1242 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1243 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1244 #else
1245 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1246 #define VBPERMD_INDEX(i) (1 - i)
1247 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1248 #define EXTRACT_BIT(avr, i, index) \
1249 (extract64((avr)->u64[1 - i], 63 - index, 1))
1250 #endif
1251
1252 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1253 {
1254 int i, j;
1255 ppc_avr_t result = { .u64 = { 0, 0 } };
1256 VECTOR_FOR_INORDER_I(i, u64) {
1257 for (j = 0; j < 8; j++) {
1258 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1259 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1260 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1261 }
1262 }
1263 }
1264 *r = result;
1265 }
1266
1267 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1268 {
1269 int i;
1270 uint64_t perm = 0;
1271
1272 VECTOR_FOR_INORDER_I(i, u8) {
1273 int index = VBPERMQ_INDEX(b, i);
1274
1275 if (index < 128) {
1276 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1277 if (a->u64[VBPERMQ_DW(index)] & mask) {
1278 perm |= (0x8000 >> i);
1279 }
1280 }
1281 }
1282
1283 r->VsrD(0) = perm;
1284 r->VsrD(1) = 0;
1285 }
1286
1287 #undef VBPERMQ_INDEX
1288 #undef VBPERMQ_DW
1289
1290 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1291 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1292 { \
1293 int i, j; \
1294 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1295 \
1296 VECTOR_FOR_INORDER_I(i, srcfld) { \
1297 prod[i] = 0; \
1298 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1299 if (a->srcfld[i] & (1ull << j)) { \
1300 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1301 } \
1302 } \
1303 } \
1304 \
1305 VECTOR_FOR_INORDER_I(i, trgfld) { \
1306 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1307 } \
1308 }
1309
1310 PMSUM(vpmsumb, u8, u16, uint16_t)
1311 PMSUM(vpmsumh, u16, u32, uint32_t)
1312 PMSUM(vpmsumw, u32, u64, uint64_t)
1313
1314 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1315 {
1316
1317 #ifdef CONFIG_INT128
1318 int i, j;
1319 __uint128_t prod[2];
1320
1321 VECTOR_FOR_INORDER_I(i, u64) {
1322 prod[i] = 0;
1323 for (j = 0; j < 64; j++) {
1324 if (a->u64[i] & (1ull << j)) {
1325 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1326 }
1327 }
1328 }
1329
1330 r->u128 = prod[0] ^ prod[1];
1331
1332 #else
1333 int i, j;
1334 ppc_avr_t prod[2];
1335
1336 VECTOR_FOR_INORDER_I(i, u64) {
1337 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1338 for (j = 0; j < 64; j++) {
1339 if (a->u64[i] & (1ull << j)) {
1340 ppc_avr_t bshift;
1341 if (j == 0) {
1342 bshift.VsrD(0) = 0;
1343 bshift.VsrD(1) = b->u64[i];
1344 } else {
1345 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1346 bshift.VsrD(1) = b->u64[i] << j;
1347 }
1348 prod[i].VsrD(1) ^= bshift.VsrD(1);
1349 prod[i].VsrD(0) ^= bshift.VsrD(0);
1350 }
1351 }
1352 }
1353
1354 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1355 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1356 #endif
1357 }
1358
1359
1360 #if defined(HOST_WORDS_BIGENDIAN)
1361 #define PKBIG 1
1362 #else
1363 #define PKBIG 0
1364 #endif
1365 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1366 {
1367 int i, j;
1368 ppc_avr_t result;
1369 #if defined(HOST_WORDS_BIGENDIAN)
1370 const ppc_avr_t *x[2] = { a, b };
1371 #else
1372 const ppc_avr_t *x[2] = { b, a };
1373 #endif
1374
1375 VECTOR_FOR_INORDER_I(i, u64) {
1376 VECTOR_FOR_INORDER_I(j, u32) {
1377 uint32_t e = x[i]->u32[j];
1378
1379 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1380 ((e >> 6) & 0x3e0) |
1381 ((e >> 3) & 0x1f));
1382 }
1383 }
1384 *r = result;
1385 }
1386
1387 #define VPK(suffix, from, to, cvt, dosat) \
1388 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1389 ppc_avr_t *a, ppc_avr_t *b) \
1390 { \
1391 int i; \
1392 int sat = 0; \
1393 ppc_avr_t result; \
1394 ppc_avr_t *a0 = PKBIG ? a : b; \
1395 ppc_avr_t *a1 = PKBIG ? b : a; \
1396 \
1397 VECTOR_FOR_INORDER_I(i, from) { \
1398 result.to[i] = cvt(a0->from[i], &sat); \
1399 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1400 } \
1401 *r = result; \
1402 if (dosat && sat) { \
1403 set_vscr_sat(env); \
1404 } \
1405 }
1406 #define I(x, y) (x)
1407 VPK(shss, s16, s8, cvtshsb, 1)
1408 VPK(shus, s16, u8, cvtshub, 1)
1409 VPK(swss, s32, s16, cvtswsh, 1)
1410 VPK(swus, s32, u16, cvtswuh, 1)
1411 VPK(sdss, s64, s32, cvtsdsw, 1)
1412 VPK(sdus, s64, u32, cvtsduw, 1)
1413 VPK(uhus, u16, u8, cvtuhub, 1)
1414 VPK(uwus, u32, u16, cvtuwuh, 1)
1415 VPK(udus, u64, u32, cvtuduw, 1)
1416 VPK(uhum, u16, u8, I, 0)
1417 VPK(uwum, u32, u16, I, 0)
1418 VPK(udum, u64, u32, I, 0)
1419 #undef I
1420 #undef VPK
1421 #undef PKBIG
1422
1423 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1424 {
1425 int i;
1426
1427 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1428 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1429 }
1430 }
1431
1432 #define VRFI(suffix, rounding) \
1433 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1434 ppc_avr_t *b) \
1435 { \
1436 int i; \
1437 float_status s = env->vec_status; \
1438 \
1439 set_float_rounding_mode(rounding, &s); \
1440 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1441 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1442 } \
1443 }
1444 VRFI(n, float_round_nearest_even)
1445 VRFI(m, float_round_down)
1446 VRFI(p, float_round_up)
1447 VRFI(z, float_round_to_zero)
1448 #undef VRFI
1449
1450 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1451 {
1452 int i;
1453
1454 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1455 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1456
1457 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1458 }
1459 }
1460
1461 #define VRLMI(name, size, element, insert) \
1462 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1463 { \
1464 int i; \
1465 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1466 uint##size##_t src1 = a->element[i]; \
1467 uint##size##_t src2 = b->element[i]; \
1468 uint##size##_t src3 = r->element[i]; \
1469 uint##size##_t begin, end, shift, mask, rot_val; \
1470 \
1471 shift = extract##size(src2, 0, 6); \
1472 end = extract##size(src2, 8, 6); \
1473 begin = extract##size(src2, 16, 6); \
1474 rot_val = rol##size(src1, shift); \
1475 mask = mask_u##size(begin, end); \
1476 if (insert) { \
1477 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1478 } else { \
1479 r->element[i] = (rot_val & mask); \
1480 } \
1481 } \
1482 }
1483
1484 VRLMI(vrldmi, 64, u64, 1);
1485 VRLMI(vrlwmi, 32, u32, 1);
1486 VRLMI(vrldnm, 64, u64, 0);
1487 VRLMI(vrlwnm, 32, u32, 0);
1488
1489 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1490 ppc_avr_t *c)
1491 {
1492 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1493 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1494 }
1495
1496 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1497 {
1498 int i;
1499
1500 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1501 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1502 }
1503 }
1504
1505 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1506 {
1507 int i;
1508
1509 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1510 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1511 }
1512 }
1513
1514 #define VEXTU_X_DO(name, size, left) \
1515 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1516 { \
1517 int index = (a & 0xf) * 8; \
1518 if (left) { \
1519 index = 128 - index - size; \
1520 } \
1521 return int128_getlo(int128_rshift(b->s128, index)) & \
1522 MAKE_64BIT_MASK(0, size); \
1523 }
1524 VEXTU_X_DO(vextublx, 8, 1)
1525 VEXTU_X_DO(vextuhlx, 16, 1)
1526 VEXTU_X_DO(vextuwlx, 32, 1)
1527 VEXTU_X_DO(vextubrx, 8, 0)
1528 VEXTU_X_DO(vextuhrx, 16, 0)
1529 VEXTU_X_DO(vextuwrx, 32, 0)
1530 #undef VEXTU_X_DO
1531
1532 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1533 {
1534 int i;
1535 unsigned int shift, bytes, size;
1536
1537 size = ARRAY_SIZE(r->u8);
1538 for (i = 0; i < size; i++) {
1539 shift = b->VsrB(i) & 0x7; /* extract shift value */
1540 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1541 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1542 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1543 }
1544 }
1545
1546 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1547 {
1548 int i;
1549 unsigned int shift, bytes;
1550
1551 /*
1552 * Use reverse order, as destination and source register can be
1553 * same. Its being modified in place saving temporary, reverse
1554 * order will guarantee that computed result is not fed back.
1555 */
1556 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1557 shift = b->VsrB(i) & 0x7; /* extract shift value */
1558 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1559 /* extract adjacent bytes */
1560 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1561 }
1562 }
1563
1564 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1565 {
1566 int sh = shift & 0xf;
1567 int i;
1568 ppc_avr_t result;
1569
1570 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1571 int index = sh + i;
1572 if (index > 0xf) {
1573 result.VsrB(i) = b->VsrB(index - 0x10);
1574 } else {
1575 result.VsrB(i) = a->VsrB(index);
1576 }
1577 }
1578 *r = result;
1579 }
1580
1581 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1582 {
1583 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1584
1585 #if defined(HOST_WORDS_BIGENDIAN)
1586 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1587 memset(&r->u8[16 - sh], 0, sh);
1588 #else
1589 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1590 memset(&r->u8[0], 0, sh);
1591 #endif
1592 }
1593
1594 #if defined(HOST_WORDS_BIGENDIAN)
1595 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1596 #else
1597 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1598 #endif
1599
1600 #define VINSX(SUFFIX, TYPE) \
1601 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1602 uint64_t val, target_ulong index) \
1603 { \
1604 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1605 target_long idx = index; \
1606 \
1607 if (idx < 0 || idx > maxidx) { \
1608 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1609 qemu_log_mask(LOG_GUEST_ERROR, \
1610 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1611 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1612 } else { \
1613 TYPE src = val; \
1614 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1615 } \
1616 }
1617 VINSX(B, uint8_t)
1618 VINSX(H, uint16_t)
1619 VINSX(W, uint32_t)
1620 VINSX(D, uint64_t)
1621 #undef ELEM_ADDR
1622 #undef VINSX
1623 #if defined(HOST_WORDS_BIGENDIAN)
1624 #define VEXTDVLX(NAME, SIZE) \
1625 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1626 target_ulong index) \
1627 { \
1628 const target_long idx = index; \
1629 ppc_avr_t tmp[2] = { *a, *b }; \
1630 memset(t, 0, sizeof(*t)); \
1631 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1632 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1633 } else { \
1634 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1635 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1636 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1637 } \
1638 }
1639 #else
1640 #define VEXTDVLX(NAME, SIZE) \
1641 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1642 target_ulong index) \
1643 { \
1644 const target_long idx = index; \
1645 ppc_avr_t tmp[2] = { *b, *a }; \
1646 memset(t, 0, sizeof(*t)); \
1647 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1648 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1649 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1650 } else { \
1651 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1652 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1653 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1654 } \
1655 }
1656 #endif
1657 VEXTDVLX(VEXTDUBVLX, 1)
1658 VEXTDVLX(VEXTDUHVLX, 2)
1659 VEXTDVLX(VEXTDUWVLX, 4)
1660 VEXTDVLX(VEXTDDVLX, 8)
1661 #undef VEXTDVLX
1662 #if defined(HOST_WORDS_BIGENDIAN)
1663 #define VEXTRACT(suffix, element) \
1664 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1665 { \
1666 uint32_t es = sizeof(r->element[0]); \
1667 memmove(&r->u8[8 - es], &b->u8[index], es); \
1668 memset(&r->u8[8], 0, 8); \
1669 memset(&r->u8[0], 0, 8 - es); \
1670 }
1671 #else
1672 #define VEXTRACT(suffix, element) \
1673 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1674 { \
1675 uint32_t es = sizeof(r->element[0]); \
1676 uint32_t s = (16 - index) - es; \
1677 memmove(&r->u8[8], &b->u8[s], es); \
1678 memset(&r->u8[0], 0, 8); \
1679 memset(&r->u8[8 + es], 0, 8 - es); \
1680 }
1681 #endif
1682 VEXTRACT(ub, u8)
1683 VEXTRACT(uh, u16)
1684 VEXTRACT(uw, u32)
1685 VEXTRACT(d, u64)
1686 #undef VEXTRACT
1687
1688 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1689 ppc_vsr_t *xb, uint32_t index)
1690 {
1691 ppc_vsr_t t = { };
1692 size_t es = sizeof(uint32_t);
1693 uint32_t ext_index;
1694 int i;
1695
1696 ext_index = index;
1697 for (i = 0; i < es; i++, ext_index++) {
1698 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1699 }
1700
1701 *xt = t;
1702 }
1703
1704 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1705 ppc_vsr_t *xb, uint32_t index)
1706 {
1707 ppc_vsr_t t = *xt;
1708 size_t es = sizeof(uint32_t);
1709 int ins_index, i = 0;
1710
1711 ins_index = index;
1712 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1713 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1714 }
1715
1716 *xt = t;
1717 }
1718
1719 #define XXBLEND(name, sz) \
1720 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1721 ppc_avr_t *c, uint32_t desc) \
1722 { \
1723 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1724 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1725 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1726 } \
1727 }
1728 XXBLEND(B, 8)
1729 XXBLEND(H, 16)
1730 XXBLEND(W, 32)
1731 XXBLEND(D, 64)
1732 #undef XXBLEND
1733
1734 #define VEXT_SIGNED(name, element, cast) \
1735 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1736 { \
1737 int i; \
1738 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1739 r->element[i] = (cast)b->element[i]; \
1740 } \
1741 }
1742 VEXT_SIGNED(vextsb2w, s32, int8_t)
1743 VEXT_SIGNED(vextsb2d, s64, int8_t)
1744 VEXT_SIGNED(vextsh2w, s32, int16_t)
1745 VEXT_SIGNED(vextsh2d, s64, int16_t)
1746 VEXT_SIGNED(vextsw2d, s64, int32_t)
1747 #undef VEXT_SIGNED
1748
1749 #define VNEG(name, element) \
1750 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1751 { \
1752 int i; \
1753 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1754 r->element[i] = -b->element[i]; \
1755 } \
1756 }
1757 VNEG(vnegw, s32)
1758 VNEG(vnegd, s64)
1759 #undef VNEG
1760
1761 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1762 {
1763 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1764
1765 #if defined(HOST_WORDS_BIGENDIAN)
1766 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1767 memset(&r->u8[0], 0, sh);
1768 #else
1769 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1770 memset(&r->u8[16 - sh], 0, sh);
1771 #endif
1772 }
1773
1774 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1775 {
1776 int i;
1777
1778 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1779 r->u32[i] = a->u32[i] >= b->u32[i];
1780 }
1781 }
1782
1783 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1784 {
1785 int64_t t;
1786 int i, upper;
1787 ppc_avr_t result;
1788 int sat = 0;
1789
1790 upper = ARRAY_SIZE(r->s32) - 1;
1791 t = (int64_t)b->VsrSW(upper);
1792 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1793 t += a->VsrSW(i);
1794 result.VsrSW(i) = 0;
1795 }
1796 result.VsrSW(upper) = cvtsdsw(t, &sat);
1797 *r = result;
1798
1799 if (sat) {
1800 set_vscr_sat(env);
1801 }
1802 }
1803
1804 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1805 {
1806 int i, j, upper;
1807 ppc_avr_t result;
1808 int sat = 0;
1809
1810 upper = 1;
1811 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1812 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1813
1814 result.VsrD(i) = 0;
1815 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1816 t += a->VsrSW(2 * i + j);
1817 }
1818 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1819 }
1820
1821 *r = result;
1822 if (sat) {
1823 set_vscr_sat(env);
1824 }
1825 }
1826
1827 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1828 {
1829 int i, j;
1830 int sat = 0;
1831
1832 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1833 int64_t t = (int64_t)b->s32[i];
1834
1835 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1836 t += a->s8[4 * i + j];
1837 }
1838 r->s32[i] = cvtsdsw(t, &sat);
1839 }
1840
1841 if (sat) {
1842 set_vscr_sat(env);
1843 }
1844 }
1845
1846 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1847 {
1848 int sat = 0;
1849 int i;
1850
1851 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1852 int64_t t = (int64_t)b->s32[i];
1853
1854 t += a->s16[2 * i] + a->s16[2 * i + 1];
1855 r->s32[i] = cvtsdsw(t, &sat);
1856 }
1857
1858 if (sat) {
1859 set_vscr_sat(env);
1860 }
1861 }
1862
1863 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1864 {
1865 int i, j;
1866 int sat = 0;
1867
1868 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1869 uint64_t t = (uint64_t)b->u32[i];
1870
1871 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1872 t += a->u8[4 * i + j];
1873 }
1874 r->u32[i] = cvtuduw(t, &sat);
1875 }
1876
1877 if (sat) {
1878 set_vscr_sat(env);
1879 }
1880 }
1881
1882 #if defined(HOST_WORDS_BIGENDIAN)
1883 #define UPKHI 1
1884 #define UPKLO 0
1885 #else
1886 #define UPKHI 0
1887 #define UPKLO 1
1888 #endif
1889 #define VUPKPX(suffix, hi) \
1890 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1891 { \
1892 int i; \
1893 ppc_avr_t result; \
1894 \
1895 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1896 uint16_t e = b->u16[hi ? i : i + 4]; \
1897 uint8_t a = (e >> 15) ? 0xff : 0; \
1898 uint8_t r = (e >> 10) & 0x1f; \
1899 uint8_t g = (e >> 5) & 0x1f; \
1900 uint8_t b = e & 0x1f; \
1901 \
1902 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1903 } \
1904 *r = result; \
1905 }
1906 VUPKPX(lpx, UPKLO)
1907 VUPKPX(hpx, UPKHI)
1908 #undef VUPKPX
1909
1910 #define VUPK(suffix, unpacked, packee, hi) \
1911 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1912 { \
1913 int i; \
1914 ppc_avr_t result; \
1915 \
1916 if (hi) { \
1917 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1918 result.unpacked[i] = b->packee[i]; \
1919 } \
1920 } else { \
1921 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1922 i++) { \
1923 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1924 } \
1925 } \
1926 *r = result; \
1927 }
1928 VUPK(hsb, s16, s8, UPKHI)
1929 VUPK(hsh, s32, s16, UPKHI)
1930 VUPK(hsw, s64, s32, UPKHI)
1931 VUPK(lsb, s16, s8, UPKLO)
1932 VUPK(lsh, s32, s16, UPKLO)
1933 VUPK(lsw, s64, s32, UPKLO)
1934 #undef VUPK
1935 #undef UPKHI
1936 #undef UPKLO
1937
1938 #define VGENERIC_DO(name, element) \
1939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1940 { \
1941 int i; \
1942 \
1943 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1944 r->element[i] = name(b->element[i]); \
1945 } \
1946 }
1947
1948 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1949 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1950
1951 VGENERIC_DO(clzb, u8)
1952 VGENERIC_DO(clzh, u16)
1953
1954 #undef clzb
1955 #undef clzh
1956
1957 #define ctzb(v) ((v) ? ctz32(v) : 8)
1958 #define ctzh(v) ((v) ? ctz32(v) : 16)
1959 #define ctzw(v) ctz32((v))
1960 #define ctzd(v) ctz64((v))
1961
1962 VGENERIC_DO(ctzb, u8)
1963 VGENERIC_DO(ctzh, u16)
1964 VGENERIC_DO(ctzw, u32)
1965 VGENERIC_DO(ctzd, u64)
1966
1967 #undef ctzb
1968 #undef ctzh
1969 #undef ctzw
1970 #undef ctzd
1971
1972 #define popcntb(v) ctpop8(v)
1973 #define popcnth(v) ctpop16(v)
1974 #define popcntw(v) ctpop32(v)
1975 #define popcntd(v) ctpop64(v)
1976
1977 VGENERIC_DO(popcntb, u8)
1978 VGENERIC_DO(popcnth, u16)
1979 VGENERIC_DO(popcntw, u32)
1980 VGENERIC_DO(popcntd, u64)
1981
1982 #undef popcntb
1983 #undef popcnth
1984 #undef popcntw
1985 #undef popcntd
1986
1987 #undef VGENERIC_DO
1988
1989 #if defined(HOST_WORDS_BIGENDIAN)
1990 #define QW_ONE { .u64 = { 0, 1 } }
1991 #else
1992 #define QW_ONE { .u64 = { 1, 0 } }
1993 #endif
1994
1995 #ifndef CONFIG_INT128
1996
1997 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1998 {
1999 t->u64[0] = ~a.u64[0];
2000 t->u64[1] = ~a.u64[1];
2001 }
2002
2003 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2004 {
2005 if (a.VsrD(0) < b.VsrD(0)) {
2006 return -1;
2007 } else if (a.VsrD(0) > b.VsrD(0)) {
2008 return 1;
2009 } else if (a.VsrD(1) < b.VsrD(1)) {
2010 return -1;
2011 } else if (a.VsrD(1) > b.VsrD(1)) {
2012 return 1;
2013 } else {
2014 return 0;
2015 }
2016 }
2017
2018 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2019 {
2020 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2021 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2022 (~a.VsrD(1) < b.VsrD(1));
2023 }
2024
2025 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2026 {
2027 ppc_avr_t not_a;
2028 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2029 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2030 (~a.VsrD(1) < b.VsrD(1));
2031 avr_qw_not(&not_a, a);
2032 return avr_qw_cmpu(not_a, b) < 0;
2033 }
2034
2035 #endif
2036
2037 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2038 {
2039 #ifdef CONFIG_INT128
2040 r->u128 = a->u128 + b->u128;
2041 #else
2042 avr_qw_add(r, *a, *b);
2043 #endif
2044 }
2045
2046 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2047 {
2048 #ifdef CONFIG_INT128
2049 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2050 #else
2051
2052 if (c->VsrD(1) & 1) {
2053 ppc_avr_t tmp;
2054
2055 tmp.VsrD(0) = 0;
2056 tmp.VsrD(1) = c->VsrD(1) & 1;
2057 avr_qw_add(&tmp, *a, tmp);
2058 avr_qw_add(r, tmp, *b);
2059 } else {
2060 avr_qw_add(r, *a, *b);
2061 }
2062 #endif
2063 }
2064
2065 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2066 {
2067 #ifdef CONFIG_INT128
2068 r->u128 = (~a->u128 < b->u128);
2069 #else
2070 ppc_avr_t not_a;
2071
2072 avr_qw_not(&not_a, *a);
2073
2074 r->VsrD(0) = 0;
2075 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2076 #endif
2077 }
2078
2079 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2080 {
2081 #ifdef CONFIG_INT128
2082 int carry_out = (~a->u128 < b->u128);
2083 if (!carry_out && (c->u128 & 1)) {
2084 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2085 ((a->u128 != 0) || (b->u128 != 0));
2086 }
2087 r->u128 = carry_out;
2088 #else
2089
2090 int carry_in = c->VsrD(1) & 1;
2091 int carry_out = 0;
2092 ppc_avr_t tmp;
2093
2094 carry_out = avr_qw_addc(&tmp, *a, *b);
2095
2096 if (!carry_out && carry_in) {
2097 ppc_avr_t one = QW_ONE;
2098 carry_out = avr_qw_addc(&tmp, tmp, one);
2099 }
2100 r->VsrD(0) = 0;
2101 r->VsrD(1) = carry_out;
2102 #endif
2103 }
2104
2105 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2106 {
2107 #ifdef CONFIG_INT128
2108 r->u128 = a->u128 - b->u128;
2109 #else
2110 ppc_avr_t tmp;
2111 ppc_avr_t one = QW_ONE;
2112
2113 avr_qw_not(&tmp, *b);
2114 avr_qw_add(&tmp, *a, tmp);
2115 avr_qw_add(r, tmp, one);
2116 #endif
2117 }
2118
2119 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2120 {
2121 #ifdef CONFIG_INT128
2122 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2123 #else
2124 ppc_avr_t tmp, sum;
2125
2126 avr_qw_not(&tmp, *b);
2127 avr_qw_add(&sum, *a, tmp);
2128
2129 tmp.VsrD(0) = 0;
2130 tmp.VsrD(1) = c->VsrD(1) & 1;
2131 avr_qw_add(r, sum, tmp);
2132 #endif
2133 }
2134
2135 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2136 {
2137 #ifdef CONFIG_INT128
2138 r->u128 = (~a->u128 < ~b->u128) ||
2139 (a->u128 + ~b->u128 == (__uint128_t)-1);
2140 #else
2141 int carry = (avr_qw_cmpu(*a, *b) > 0);
2142 if (!carry) {
2143 ppc_avr_t tmp;
2144 avr_qw_not(&tmp, *b);
2145 avr_qw_add(&tmp, *a, tmp);
2146 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2147 }
2148 r->VsrD(0) = 0;
2149 r->VsrD(1) = carry;
2150 #endif
2151 }
2152
2153 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2154 {
2155 #ifdef CONFIG_INT128
2156 r->u128 =
2157 (~a->u128 < ~b->u128) ||
2158 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2159 #else
2160 int carry_in = c->VsrD(1) & 1;
2161 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2162 if (!carry_out && carry_in) {
2163 ppc_avr_t tmp;
2164 avr_qw_not(&tmp, *b);
2165 avr_qw_add(&tmp, *a, tmp);
2166 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2167 }
2168
2169 r->VsrD(0) = 0;
2170 r->VsrD(1) = carry_out;
2171 #endif
2172 }
2173
2174 #define BCD_PLUS_PREF_1 0xC
2175 #define BCD_PLUS_PREF_2 0xF
2176 #define BCD_PLUS_ALT_1 0xA
2177 #define BCD_NEG_PREF 0xD
2178 #define BCD_NEG_ALT 0xB
2179 #define BCD_PLUS_ALT_2 0xE
2180 #define NATIONAL_PLUS 0x2B
2181 #define NATIONAL_NEG 0x2D
2182
2183 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2184
2185 static int bcd_get_sgn(ppc_avr_t *bcd)
2186 {
2187 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2188 case BCD_PLUS_PREF_1:
2189 case BCD_PLUS_PREF_2:
2190 case BCD_PLUS_ALT_1:
2191 case BCD_PLUS_ALT_2:
2192 {
2193 return 1;
2194 }
2195
2196 case BCD_NEG_PREF:
2197 case BCD_NEG_ALT:
2198 {
2199 return -1;
2200 }
2201
2202 default:
2203 {
2204 return 0;
2205 }
2206 }
2207 }
2208
2209 static int bcd_preferred_sgn(int sgn, int ps)
2210 {
2211 if (sgn >= 0) {
2212 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2213 } else {
2214 return BCD_NEG_PREF;
2215 }
2216 }
2217
2218 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2219 {
2220 uint8_t result;
2221 if (n & 1) {
2222 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2223 } else {
2224 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2225 }
2226
2227 if (unlikely(result > 9)) {
2228 *invalid = true;
2229 }
2230 return result;
2231 }
2232
2233 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2234 {
2235 if (n & 1) {
2236 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2237 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2238 } else {
2239 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2240 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2241 }
2242 }
2243
2244 static bool bcd_is_valid(ppc_avr_t *bcd)
2245 {
2246 int i;
2247 int invalid = 0;
2248
2249 if (bcd_get_sgn(bcd) == 0) {
2250 return false;
2251 }
2252
2253 for (i = 1; i < 32; i++) {
2254 bcd_get_digit(bcd, i, &invalid);
2255 if (unlikely(invalid)) {
2256 return false;
2257 }
2258 }
2259 return true;
2260 }
2261
2262 static int bcd_cmp_zero(ppc_avr_t *bcd)
2263 {
2264 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2265 return CRF_EQ;
2266 } else {
2267 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2268 }
2269 }
2270
2271 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2272 {
2273 return reg->VsrH(7 - n);
2274 }
2275
2276 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2277 {
2278 reg->VsrH(7 - n) = val;
2279 }
2280
2281 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2282 {
2283 int i;
2284 int invalid = 0;
2285 for (i = 31; i > 0; i--) {
2286 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2287 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2288 if (unlikely(invalid)) {
2289 return 0; /* doesn't matter */
2290 } else if (dig_a > dig_b) {
2291 return 1;
2292 } else if (dig_a < dig_b) {
2293 return -1;
2294 }
2295 }
2296
2297 return 0;
2298 }
2299
2300 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2301 int *overflow)
2302 {
2303 int carry = 0;
2304 int i;
2305 int is_zero = 1;
2306
2307 for (i = 1; i <= 31; i++) {
2308 uint8_t digit = bcd_get_digit(a, i, invalid) +
2309 bcd_get_digit(b, i, invalid) + carry;
2310 is_zero &= (digit == 0);
2311 if (digit > 9) {
2312 carry = 1;
2313 digit -= 10;
2314 } else {
2315 carry = 0;
2316 }
2317
2318 bcd_put_digit(t, digit, i);
2319 }
2320
2321 *overflow = carry;
2322 return is_zero;
2323 }
2324
2325 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2326 int *overflow)
2327 {
2328 int carry = 0;
2329 int i;
2330
2331 for (i = 1; i <= 31; i++) {
2332 uint8_t digit = bcd_get_digit(a, i, invalid) -
2333 bcd_get_digit(b, i, invalid) + carry;
2334 if (digit & 0x80) {
2335 carry = -1;
2336 digit += 10;
2337 } else {
2338 carry = 0;
2339 }
2340
2341 bcd_put_digit(t, digit, i);
2342 }
2343
2344 *overflow = carry;
2345 }
2346
2347 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2348 {
2349
2350 int sgna = bcd_get_sgn(a);
2351 int sgnb = bcd_get_sgn(b);
2352 int invalid = (sgna == 0) || (sgnb == 0);
2353 int overflow = 0;
2354 int zero = 0;
2355 uint32_t cr = 0;
2356 ppc_avr_t result = { .u64 = { 0, 0 } };
2357
2358 if (!invalid) {
2359 if (sgna == sgnb) {
2360 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2361 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2362 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2363 } else {
2364 int magnitude = bcd_cmp_mag(a, b);
2365 if (magnitude > 0) {
2366 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2367 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2368 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2369 } else if (magnitude < 0) {
2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2371 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2372 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2373 } else {
2374 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2375 cr = CRF_EQ;
2376 }
2377 }
2378 }
2379
2380 if (unlikely(invalid)) {
2381 result.VsrD(0) = result.VsrD(1) = -1;
2382 cr = CRF_SO;
2383 } else if (overflow) {
2384 cr |= CRF_SO;
2385 } else if (zero) {
2386 cr |= CRF_EQ;
2387 }
2388
2389 *r = result;
2390
2391 return cr;
2392 }
2393
2394 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2395 {
2396 ppc_avr_t bcopy = *b;
2397 int sgnb = bcd_get_sgn(b);
2398 if (sgnb < 0) {
2399 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2400 } else if (sgnb > 0) {
2401 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2402 }
2403 /* else invalid ... defer to bcdadd code for proper handling */
2404
2405 return helper_bcdadd(r, a, &bcopy, ps);
2406 }
2407
2408 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2409 {
2410 int i;
2411 int cr = 0;
2412 uint16_t national = 0;
2413 uint16_t sgnb = get_national_digit(b, 0);
2414 ppc_avr_t ret = { .u64 = { 0, 0 } };
2415 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2416
2417 for (i = 1; i < 8; i++) {
2418 national = get_national_digit(b, i);
2419 if (unlikely(national < 0x30 || national > 0x39)) {
2420 invalid = 1;
2421 break;
2422 }
2423
2424 bcd_put_digit(&ret, national & 0xf, i);
2425 }
2426
2427 if (sgnb == NATIONAL_PLUS) {
2428 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2429 } else {
2430 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2431 }
2432
2433 cr = bcd_cmp_zero(&ret);
2434
2435 if (unlikely(invalid)) {
2436 cr = CRF_SO;
2437 }
2438
2439 *r = ret;
2440
2441 return cr;
2442 }
2443
2444 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2445 {
2446 int i;
2447 int cr = 0;
2448 int sgnb = bcd_get_sgn(b);
2449 int invalid = (sgnb == 0);
2450 ppc_avr_t ret = { .u64 = { 0, 0 } };
2451
2452 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2453
2454 for (i = 1; i < 8; i++) {
2455 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2456
2457 if (unlikely(invalid)) {
2458 break;
2459 }
2460 }
2461 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2462
2463 cr = bcd_cmp_zero(b);
2464
2465 if (ox_flag) {
2466 cr |= CRF_SO;
2467 }
2468
2469 if (unlikely(invalid)) {
2470 cr = CRF_SO;
2471 }
2472
2473 *r = ret;
2474
2475 return cr;
2476 }
2477
2478 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2479 {
2480 int i;
2481 int cr = 0;
2482 int invalid = 0;
2483 int zone_digit = 0;
2484 int zone_lead = ps ? 0xF : 0x3;
2485 int digit = 0;
2486 ppc_avr_t ret = { .u64 = { 0, 0 } };
2487 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2488
2489 if (unlikely((sgnb < 0xA) && ps)) {
2490 invalid = 1;
2491 }
2492
2493 for (i = 0; i < 16; i++) {
2494 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2495 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2496 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2497 invalid = 1;
2498 break;
2499 }
2500
2501 bcd_put_digit(&ret, digit, i + 1);
2502 }
2503
2504 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2505 (!ps && (sgnb & 0x4))) {
2506 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2507 } else {
2508 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2509 }
2510
2511 cr = bcd_cmp_zero(&ret);
2512
2513 if (unlikely(invalid)) {
2514 cr = CRF_SO;
2515 }
2516
2517 *r = ret;
2518
2519 return cr;
2520 }
2521
2522 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2523 {
2524 int i;
2525 int cr = 0;
2526 uint8_t digit = 0;
2527 int sgnb = bcd_get_sgn(b);
2528 int zone_lead = (ps) ? 0xF0 : 0x30;
2529 int invalid = (sgnb == 0);
2530 ppc_avr_t ret = { .u64 = { 0, 0 } };
2531
2532 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2533
2534 for (i = 0; i < 16; i++) {
2535 digit = bcd_get_digit(b, i + 1, &invalid);
2536
2537 if (unlikely(invalid)) {
2538 break;
2539 }
2540
2541 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2542 }
2543
2544 if (ps) {
2545 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2546 } else {
2547 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2548 }
2549
2550 cr = bcd_cmp_zero(b);
2551
2552 if (ox_flag) {
2553 cr |= CRF_SO;
2554 }
2555
2556 if (unlikely(invalid)) {
2557 cr = CRF_SO;
2558 }
2559
2560 *r = ret;
2561
2562 return cr;
2563 }
2564
2565 /**
2566 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2567 *
2568 * Returns:
2569 * > 0 if ahi|alo > bhi|blo,
2570 * 0 if ahi|alo == bhi|blo,
2571 * < 0 if ahi|alo < bhi|blo
2572 */
2573 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2574 uint64_t blo, uint64_t bhi)
2575 {
2576 return (ahi == bhi) ?
2577 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2578 (ahi > bhi ? 1 : -1);
2579 }
2580
2581 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2582 {
2583 int i;
2584 int cr;
2585 uint64_t lo_value;
2586 uint64_t hi_value;
2587 uint64_t rem;
2588 ppc_avr_t ret = { .u64 = { 0, 0 } };
2589
2590 if (b->VsrSD(0) < 0) {
2591 lo_value = -b->VsrSD(1);
2592 hi_value = ~b->VsrD(0) + !lo_value;
2593 bcd_put_digit(&ret, 0xD, 0);
2594
2595 cr = CRF_LT;
2596 } else {
2597 lo_value = b->VsrD(1);
2598 hi_value = b->VsrD(0);
2599 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2600
2601 if (hi_value == 0 && lo_value == 0) {
2602 cr = CRF_EQ;
2603 } else {
2604 cr = CRF_GT;
2605 }
2606 }
2607
2608 /*
2609 * Check src limits: abs(src) <= 10^31 - 1
2610 *
2611 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2612 */
2613 if (ucmp128(lo_value, hi_value,
2614 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2615 cr |= CRF_SO;
2616
2617 /*
2618 * According to the ISA, if src wouldn't fit in the destination
2619 * register, the result is undefined.
2620 * In that case, we leave r unchanged.
2621 */
2622 } else {
2623 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2624
2625 for (i = 1; i < 16; rem /= 10, i++) {
2626 bcd_put_digit(&ret, rem % 10, i);
2627 }
2628
2629 for (; i < 32; lo_value /= 10, i++) {
2630 bcd_put_digit(&ret, lo_value % 10, i);
2631 }
2632
2633 *r = ret;
2634 }
2635
2636 return cr;
2637 }
2638
2639 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2640 {
2641 uint8_t i;
2642 int cr;
2643 uint64_t carry;
2644 uint64_t unused;
2645 uint64_t lo_value;
2646 uint64_t hi_value = 0;
2647 int sgnb = bcd_get_sgn(b);
2648 int invalid = (sgnb == 0);
2649
2650 lo_value = bcd_get_digit(b, 31, &invalid);
2651 for (i = 30; i > 0; i--) {
2652 mulu64(&lo_value, &carry, lo_value, 10ULL);
2653 mulu64(&hi_value, &unused, hi_value, 10ULL);
2654 lo_value += bcd_get_digit(b, i, &invalid);
2655 hi_value += carry;
2656
2657 if (unlikely(invalid)) {
2658 break;
2659 }
2660 }
2661
2662 if (sgnb == -1) {
2663 r->VsrSD(1) = -lo_value;
2664 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2665 } else {
2666 r->VsrSD(1) = lo_value;
2667 r->VsrSD(0) = hi_value;
2668 }
2669
2670 cr = bcd_cmp_zero(b);
2671
2672 if (unlikely(invalid)) {
2673 cr = CRF_SO;
2674 }
2675
2676 return cr;
2677 }
2678
2679 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2680 {
2681 int i;
2682 int invalid = 0;
2683
2684 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2685 return CRF_SO;
2686 }
2687
2688 *r = *a;
2689 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2690
2691 for (i = 1; i < 32; i++) {
2692 bcd_get_digit(a, i, &invalid);
2693 bcd_get_digit(b, i, &invalid);
2694 if (unlikely(invalid)) {
2695 return CRF_SO;
2696 }
2697 }
2698
2699 return bcd_cmp_zero(r);
2700 }
2701
2702 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2703 {
2704 int sgnb = bcd_get_sgn(b);
2705
2706 *r = *b;
2707 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2708
2709 if (bcd_is_valid(b) == false) {
2710 return CRF_SO;
2711 }
2712
2713 return bcd_cmp_zero(r);
2714 }
2715
2716 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2717 {
2718 int cr;
2719 int i = a->VsrSB(7);
2720 bool ox_flag = false;
2721 int sgnb = bcd_get_sgn(b);
2722 ppc_avr_t ret = *b;
2723 ret.VsrD(1) &= ~0xf;
2724
2725 if (bcd_is_valid(b) == false) {
2726 return CRF_SO;
2727 }
2728
2729 if (unlikely(i > 31)) {
2730 i = 31;
2731 } else if (unlikely(i < -31)) {
2732 i = -31;
2733 }
2734
2735 if (i > 0) {
2736 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2737 } else {
2738 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2739 }
2740 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2741
2742 *r = ret;
2743
2744 cr = bcd_cmp_zero(r);
2745 if (ox_flag) {
2746 cr |= CRF_SO;
2747 }
2748
2749 return cr;
2750 }
2751
2752 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2753 {
2754 int cr;
2755 int i;
2756 int invalid = 0;
2757 bool ox_flag = false;
2758 ppc_avr_t ret = *b;
2759
2760 for (i = 0; i < 32; i++) {
2761 bcd_get_digit(b, i, &invalid);
2762
2763 if (unlikely(invalid)) {
2764 return CRF_SO;
2765 }
2766 }
2767
2768 i = a->VsrSB(7);
2769 if (i >= 32) {
2770 ox_flag = true;
2771 ret.VsrD(1) = ret.VsrD(0) = 0;
2772 } else if (i <= -32) {
2773 ret.VsrD(1) = ret.VsrD(0) = 0;
2774 } else if (i > 0) {
2775 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2776 } else {
2777 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2778 }
2779 *r = ret;
2780
2781 cr = bcd_cmp_zero(r);
2782 if (ox_flag) {
2783 cr |= CRF_SO;
2784 }
2785
2786 return cr;
2787 }
2788
2789 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2790 {
2791 int cr;
2792 int unused = 0;
2793 int invalid = 0;
2794 bool ox_flag = false;
2795 int sgnb = bcd_get_sgn(b);
2796 ppc_avr_t ret = *b;
2797 ret.VsrD(1) &= ~0xf;
2798
2799 int i = a->VsrSB(7);
2800 ppc_avr_t bcd_one;
2801
2802 bcd_one.VsrD(0) = 0;
2803 bcd_one.VsrD(1) = 0x10;
2804
2805 if (bcd_is_valid(b) == false) {
2806 return CRF_SO;
2807 }
2808
2809 if (unlikely(i > 31)) {
2810 i = 31;
2811 } else if (unlikely(i < -31)) {
2812 i = -31;
2813 }
2814
2815 if (i > 0) {
2816 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2817 } else {
2818 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2819
2820 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2821 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2822 }
2823 }
2824 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2825
2826 cr = bcd_cmp_zero(&ret);
2827 if (ox_flag) {
2828 cr |= CRF_SO;
2829 }
2830 *r = ret;
2831
2832 return cr;
2833 }
2834
2835 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2836 {
2837 uint64_t mask;
2838 uint32_t ox_flag = 0;
2839 int i = a->VsrSH(3) + 1;
2840 ppc_avr_t ret = *b;
2841
2842 if (bcd_is_valid(b) == false) {
2843 return CRF_SO;
2844 }
2845
2846 if (i > 16 && i < 32) {
2847 mask = (uint64_t)-1 >> (128 - i * 4);
2848 if (ret.VsrD(0) & ~mask) {
2849 ox_flag = CRF_SO;
2850 }
2851
2852 ret.VsrD(0) &= mask;
2853 } else if (i >= 0 && i <= 16) {
2854 mask = (uint64_t)-1 >> (64 - i * 4);
2855 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2856 ox_flag = CRF_SO;
2857 }
2858
2859 ret.VsrD(1) &= mask;
2860 ret.VsrD(0) = 0;
2861 }
2862 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2863 *r = ret;
2864
2865 return bcd_cmp_zero(&ret) | ox_flag;
2866 }
2867
2868 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2869 {
2870 int i;
2871 uint64_t mask;
2872 uint32_t ox_flag = 0;
2873 int invalid = 0;
2874 ppc_avr_t ret = *b;
2875
2876 for (i = 0; i < 32; i++) {
2877 bcd_get_digit(b, i, &invalid);
2878
2879 if (unlikely(invalid)) {
2880 return CRF_SO;
2881 }
2882 }
2883
2884 i = a->VsrSH(3);
2885 if (i > 16 && i < 33) {
2886 mask = (uint64_t)-1 >> (128 - i * 4);
2887 if (ret.VsrD(0) & ~mask) {
2888 ox_flag = CRF_SO;
2889 }
2890
2891 ret.VsrD(0) &= mask;
2892 } else if (i > 0 && i <= 16) {
2893 mask = (uint64_t)-1 >> (64 - i * 4);
2894 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2895 ox_flag = CRF_SO;
2896 }
2897
2898 ret.VsrD(1) &= mask;
2899 ret.VsrD(0) = 0;
2900 } else if (i == 0) {
2901 if (ret.VsrD(0) || ret.VsrD(1)) {
2902 ox_flag = CRF_SO;
2903 }
2904 ret.VsrD(0) = ret.VsrD(1) = 0;
2905 }
2906
2907 *r = ret;
2908 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2909 return ox_flag | CRF_EQ;
2910 }
2911
2912 return ox_flag | CRF_GT;
2913 }
2914
2915 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2916 {
2917 int i;
2918 VECTOR_FOR_INORDER_I(i, u8) {
2919 r->u8[i] = AES_sbox[a->u8[i]];
2920 }
2921 }
2922
2923 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2924 {
2925 ppc_avr_t result;
2926 int i;
2927
2928 VECTOR_FOR_INORDER_I(i, u32) {
2929 result.VsrW(i) = b->VsrW(i) ^
2930 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2931 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2932 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2933 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2934 }
2935 *r = result;
2936 }
2937
2938 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2939 {
2940 ppc_avr_t result;
2941 int i;
2942
2943 VECTOR_FOR_INORDER_I(i, u8) {
2944 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2945 }
2946 *r = result;
2947 }
2948
2949 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2950 {
2951 /* This differs from what is written in ISA V2.07. The RTL is */
2952 /* incorrect and will be fixed in V2.07B. */
2953 int i;
2954 ppc_avr_t tmp;
2955
2956 VECTOR_FOR_INORDER_I(i, u8) {
2957 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2958 }
2959
2960 VECTOR_FOR_INORDER_I(i, u32) {
2961 r->VsrW(i) =
2962 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2963 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2964 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2965 AES_imc[tmp.VsrB(4 * i + 3)][3];
2966 }
2967 }
2968
2969 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2970 {
2971 ppc_avr_t result;
2972 int i;
2973
2974 VECTOR_FOR_INORDER_I(i, u8) {
2975 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2976 }
2977 *r = result;
2978 }
2979
2980 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2981 {
2982 int st = (st_six & 0x10) != 0;
2983 int six = st_six & 0xF;
2984 int i;
2985
2986 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2987 if (st == 0) {
2988 if ((six & (0x8 >> i)) == 0) {
2989 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2990 ror32(a->VsrW(i), 18) ^
2991 (a->VsrW(i) >> 3);
2992 } else { /* six.bit[i] == 1 */
2993 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2994 ror32(a->VsrW(i), 19) ^
2995 (a->VsrW(i) >> 10);
2996 }
2997 } else { /* st == 1 */
2998 if ((six & (0x8 >> i)) == 0) {
2999 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3000 ror32(a->VsrW(i), 13) ^
3001 ror32(a->VsrW(i), 22);
3002 } else { /* six.bit[i] == 1 */
3003 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3004 ror32(a->VsrW(i), 11) ^
3005 ror32(a->VsrW(i), 25);
3006 }
3007 }
3008 }
3009 }
3010
3011 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3012 {
3013 int st = (st_six & 0x10) != 0;
3014 int six = st_six & 0xF;
3015 int i;
3016
3017 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3018 if (st == 0) {
3019 if ((six & (0x8 >> (2 * i))) == 0) {
3020 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3021 ror64(a->VsrD(i), 8) ^
3022 (a->VsrD(i) >> 7);
3023 } else { /* six.bit[2*i] == 1 */
3024 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3025 ror64(a->VsrD(i), 61) ^
3026 (a->VsrD(i) >> 6);
3027 }
3028 } else { /* st == 1 */
3029 if ((six & (0x8 >> (2 * i))) == 0) {
3030 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3031 ror64(a->VsrD(i), 34) ^
3032 ror64(a->VsrD(i), 39);
3033 } else { /* six.bit[2*i] == 1 */
3034 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3035 ror64(a->VsrD(i), 18) ^
3036 ror64(a->VsrD(i), 41);
3037 }
3038 }
3039 }
3040 }
3041
3042 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3043 {
3044 ppc_avr_t result;
3045 int i;
3046
3047 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3048 int indexA = c->VsrB(i) >> 4;
3049 int indexB = c->VsrB(i) & 0xF;
3050
3051 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3052 }
3053 *r = result;
3054 }
3055
3056 #undef VECTOR_FOR_INORDER_I
3057
3058 /*****************************************************************************/
3059 /* SPE extension helpers */
3060 /* Use a table to make this quicker */
3061 static const uint8_t hbrev[16] = {
3062 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3063 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3064 };
3065
3066 static inline uint8_t byte_reverse(uint8_t val)
3067 {
3068 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3069 }
3070
3071 static inline uint32_t word_reverse(uint32_t val)
3072 {
3073 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3074 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3075 }
3076
3077 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3078 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3079 {
3080 uint32_t a, b, d, mask;
3081
3082 mask = UINT32_MAX >> (32 - MASKBITS);
3083 a = arg1 & mask;
3084 b = arg2 & mask;
3085 d = word_reverse(1 + word_reverse(a | ~b));
3086 return (arg1 & ~mask) | (d & b);
3087 }
3088
3089 uint32_t helper_cntlsw32(uint32_t val)
3090 {
3091 if (val & 0x80000000) {
3092 return clz32(~val);
3093 } else {
3094 return clz32(val);
3095 }
3096 }
3097
3098 uint32_t helper_cntlzw32(uint32_t val)
3099 {
3100 return clz32(val);
3101 }
3102
3103 /* 440 specific */
3104 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3105 target_ulong low, uint32_t update_Rc)
3106 {
3107 target_ulong mask;
3108 int i;
3109
3110 i = 1;
3111 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3112 if ((high & mask) == 0) {
3113 if (update_Rc) {
3114 env->crf[0] = 0x4;
3115 }
3116 goto done;
3117 }
3118 i++;
3119 }
3120 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3121 if ((low & mask) == 0) {
3122 if (update_Rc) {
3123 env->crf[0] = 0x8;
3124 }
3125 goto done;
3126 }
3127 i++;
3128 }
3129 i = 8;
3130 if (update_Rc) {
3131 env->crf[0] = 0x2;
3132 }
3133 done:
3134 env->xer = (env->xer & ~0x7F) | i;
3135 if (update_Rc) {
3136 env->crf[0] |= xer_so;
3137 }
3138 return i;
3139 }