]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target/ppc: Move vexts[bhw]2[wd] to decodetree
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
35
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 {
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
42 }
43 }
44
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
47 {
48 uint64_t rt = 0;
49 int overflow = 0;
50
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
53
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
59 }
60
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
63 }
64
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
67 }
68
69 return (target_ulong)rt;
70 }
71
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
74 {
75 int64_t rt = 0;
76 int overflow = 0;
77
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
80
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
87 }
88
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
91 }
92
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
95 }
96
97 return (target_ulong)rt;
98 }
99
100 #if defined(TARGET_PPC64)
101
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 {
104 uint64_t rt = 0;
105 int overflow = 0;
106
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
112 }
113
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
116 }
117
118 return rt;
119 }
120
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 {
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
127
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
133 }
134
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
137 }
138
139 return rt;
140 }
141
142 #endif
143
144
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
148
149 /*
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
154 */
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
156
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
159
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
161 {
162 return hasvalue(rb, ra) ? CRF_GT : 0;
163 }
164
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
168
169 /*
170 * Return a random number.
171 */
172 uint64_t helper_darn32(void)
173 {
174 Error *err = NULL;
175 uint32_t ret;
176
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
182 }
183
184 return ret;
185 }
186
187 uint64_t helper_darn64(void)
188 {
189 Error *err = NULL;
190 uint64_t ret;
191
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
197 }
198
199 return ret;
200 }
201
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
203 {
204 int i;
205 uint64_t ra = 0;
206
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
212 }
213 }
214 }
215 return ra;
216 }
217
218 #endif
219
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
221 {
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
225
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
229 }
230 mask <<= 8;
231 }
232 return ra;
233 }
234
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
238 {
239 int32_t ret;
240
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
249 }
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
253 }
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
257 }
258 return (target_long)ret;
259 }
260
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
264 {
265 int64_t ret;
266
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
275 }
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
279 }
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
283 }
284 return ret;
285 }
286 #endif
287
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
299 }
300
301 target_ulong helper_popcntw(target_ulong val)
302 {
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
315 }
316 #else
317 target_ulong helper_popcntb(target_ulong val)
318 {
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
324 }
325 #endif
326
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
328 {
329 /*
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
334 */
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
338
339 if (mask == 0 || mask == -1) {
340 return src;
341 }
342
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
349 }
350
351 /*
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
355 */
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
361 }
362
363 /*
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
368 */
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
374 }
375
376 /*
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
379 */
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
384 }
385
386 return left | (right >> n);
387 }
388
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
390 {
391 int i, o;
392 uint64_t result = 0;
393
394 if (mask == -1) {
395 return src;
396 }
397
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
402 }
403
404 return result;
405 }
406
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
408 {
409 int i, o;
410 uint64_t result = 0;
411
412 if (mask == -1) {
413 return src;
414 }
415
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
420 }
421
422 return result;
423 }
424
425 /*****************************************************************************/
426 /* Altivec extension helpers */
427 #if defined(HOST_WORDS_BIGENDIAN)
428 #define VECTOR_FOR_INORDER_I(index, element) \
429 for (index = 0; index < ARRAY_SIZE(r->element); index++)
430 #else
431 #define VECTOR_FOR_INORDER_I(index, element) \
432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
433 #endif
434
435 /* Saturating arithmetic helpers. */
436 #define SATCVT(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x < (from_type)min) { \
442 r = min; \
443 *sat = 1; \
444 } else if (x > (from_type)max) { \
445 r = max; \
446 *sat = 1; \
447 } else { \
448 r = x; \
449 } \
450 return r; \
451 }
452 #define SATCVTU(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
454 { \
455 to_type r; \
456 \
457 if (x > (from_type)max) { \
458 r = max; \
459 *sat = 1; \
460 } else { \
461 r = x; \
462 } \
463 return r; \
464 }
465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
468
469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
475 #undef SATCVT
476 #undef SATCVTU
477
478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
479 {
480 ppc_store_vscr(env, vscr);
481 }
482
483 uint32_t helper_mfvscr(CPUPPCState *env)
484 {
485 return ppc_get_vscr(env);
486 }
487
488 static inline void set_vscr_sat(CPUPPCState *env)
489 {
490 /* The choice of non-zero value is arbitrary. */
491 env->vscr_sat.u32[0] = 1;
492 }
493
494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
495 {
496 int i;
497
498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
499 r->u32[i] = ~a->u32[i] < b->u32[i];
500 }
501 }
502
503 /* vprtybw */
504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
505 {
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
509 res ^= res >> 8;
510 r->u32[i] = res & 1;
511 }
512 }
513
514 /* vprtybd */
515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
516 {
517 int i;
518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->u64[i] = res & 1;
523 }
524 }
525
526 /* vprtybq */
527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
528 {
529 uint64_t res = b->u64[0] ^ b->u64[1];
530 res ^= res >> 32;
531 res ^= res >> 16;
532 res ^= res >> 8;
533 r->VsrD(1) = res & 1;
534 r->VsrD(0) = 0;
535 }
536
537 #define VARITHFP(suffix, func) \
538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
540 { \
541 int i; \
542 \
543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
545 } \
546 }
547 VARITHFP(addfp, float32_add)
548 VARITHFP(subfp, float32_sub)
549 VARITHFP(minfp, float32_min)
550 VARITHFP(maxfp, float32_max)
551 #undef VARITHFP
552
553 #define VARITHFPFMA(suffix, type) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b, ppc_avr_t *c) \
556 { \
557 int i; \
558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
560 type, &env->vec_status); \
561 } \
562 }
563 VARITHFPFMA(maddfp, 0);
564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
565 #undef VARITHFPFMA
566
567 #define VARITHSAT_CASE(type, op, cvt, element) \
568 { \
569 type result = (type)a->element[i] op (type)b->element[i]; \
570 r->element[i] = cvt(result, &sat); \
571 }
572
573 #define VARITHSAT_DO(name, op, optype, cvt, element) \
574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
576 { \
577 int sat = 0; \
578 int i; \
579 \
580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
581 VARITHSAT_CASE(optype, op, cvt, element); \
582 } \
583 if (sat) { \
584 vscr_sat->u32[0] = 1; \
585 } \
586 }
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
600 #undef VARITHSAT_DO
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
603
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
606 { \
607 int i; \
608 \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
612 } \
613 }
614
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
616 unsigned_type) \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
622 #undef VAVG_DO
623 #undef VAVG
624
625 #define VABSDU_DO(name, element) \
626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
627 { \
628 int i; \
629 \
630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
631 r->element[i] = (a->element[i] > b->element[i]) ? \
632 (a->element[i] - b->element[i]) : \
633 (b->element[i] - a->element[i]); \
634 } \
635 }
636
637 /*
638 * VABSDU - Vector absolute difference unsigned
639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
640 * element - element type to access from vector
641 */
642 #define VABSDU(type, element) \
643 VABSDU_DO(absdu##type, element)
644 VABSDU(b, u8)
645 VABSDU(h, u16)
646 VABSDU(w, u32)
647 #undef VABSDU_DO
648 #undef VABSDU
649
650 #define VCF(suffix, cvt, element) \
651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
652 ppc_avr_t *b, uint32_t uim) \
653 { \
654 int i; \
655 \
656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
657 float32 t = cvt(b->element[i], &env->vec_status); \
658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
659 } \
660 }
661 VCF(ux, uint32_to_float32, u32)
662 VCF(sx, int32_to_float32, s32)
663 #undef VCF
664
665 #define VCMP_DO(suffix, compare, element, record) \
666 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
667 ppc_avr_t *a, ppc_avr_t *b) \
668 { \
669 uint64_t ones = (uint64_t)-1; \
670 uint64_t all = ones; \
671 uint64_t none = 0; \
672 int i; \
673 \
674 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
675 uint64_t result = (a->element[i] compare b->element[i] ? \
676 ones : 0x0); \
677 switch (sizeof(a->element[0])) { \
678 case 8: \
679 r->u64[i] = result; \
680 break; \
681 case 4: \
682 r->u32[i] = result; \
683 break; \
684 case 2: \
685 r->u16[i] = result; \
686 break; \
687 case 1: \
688 r->u8[i] = result; \
689 break; \
690 } \
691 all &= result; \
692 none |= result; \
693 } \
694 if (record) { \
695 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
696 } \
697 }
698 #define VCMP(suffix, compare, element) \
699 VCMP_DO(suffix, compare, element, 0) \
700 VCMP_DO(suffix##_dot, compare, element, 1)
701 VCMP(equb, ==, u8)
702 VCMP(equh, ==, u16)
703 VCMP(equw, ==, u32)
704 VCMP(equd, ==, u64)
705 VCMP(gtub, >, u8)
706 VCMP(gtuh, >, u16)
707 VCMP(gtuw, >, u32)
708 VCMP(gtud, >, u64)
709 VCMP(gtsb, >, s8)
710 VCMP(gtsh, >, s16)
711 VCMP(gtsw, >, s32)
712 VCMP(gtsd, >, s64)
713 #undef VCMP_DO
714 #undef VCMP
715
716 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
717 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
718 ppc_avr_t *a, ppc_avr_t *b) \
719 { \
720 etype ones = (etype)-1; \
721 etype all = ones; \
722 etype result, none = 0; \
723 int i; \
724 \
725 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
726 if (cmpzero) { \
727 result = ((a->element[i] == 0) \
728 || (b->element[i] == 0) \
729 || (a->element[i] != b->element[i]) ? \
730 ones : 0x0); \
731 } else { \
732 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
733 } \
734 r->element[i] = result; \
735 all &= result; \
736 none |= result; \
737 } \
738 if (record) { \
739 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
740 } \
741 }
742
743 /*
744 * VCMPNEZ - Vector compare not equal to zero
745 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
746 * element - element type to access from vector
747 */
748 #define VCMPNE(suffix, element, etype, cmpzero) \
749 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
750 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
751 VCMPNE(zb, u8, uint8_t, 1)
752 VCMPNE(zh, u16, uint16_t, 1)
753 VCMPNE(zw, u32, uint32_t, 1)
754 VCMPNE(b, u8, uint8_t, 0)
755 VCMPNE(h, u16, uint16_t, 0)
756 VCMPNE(w, u32, uint32_t, 0)
757 #undef VCMPNE_DO
758 #undef VCMPNE
759
760 #define VCMPFP_DO(suffix, compare, order, record) \
761 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
762 ppc_avr_t *a, ppc_avr_t *b) \
763 { \
764 uint32_t ones = (uint32_t)-1; \
765 uint32_t all = ones; \
766 uint32_t none = 0; \
767 int i; \
768 \
769 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
770 uint32_t result; \
771 FloatRelation rel = \
772 float32_compare_quiet(a->f32[i], b->f32[i], \
773 &env->vec_status); \
774 if (rel == float_relation_unordered) { \
775 result = 0; \
776 } else if (rel compare order) { \
777 result = ones; \
778 } else { \
779 result = 0; \
780 } \
781 r->u32[i] = result; \
782 all &= result; \
783 none |= result; \
784 } \
785 if (record) { \
786 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
787 } \
788 }
789 #define VCMPFP(suffix, compare, order) \
790 VCMPFP_DO(suffix, compare, order, 0) \
791 VCMPFP_DO(suffix##_dot, compare, order, 1)
792 VCMPFP(eqfp, ==, float_relation_equal)
793 VCMPFP(gefp, !=, float_relation_less)
794 VCMPFP(gtfp, ==, float_relation_greater)
795 #undef VCMPFP_DO
796 #undef VCMPFP
797
798 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
799 ppc_avr_t *a, ppc_avr_t *b, int record)
800 {
801 int i;
802 int all_in = 0;
803
804 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
805 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
806 &env->vec_status);
807 if (le_rel == float_relation_unordered) {
808 r->u32[i] = 0xc0000000;
809 all_in = 1;
810 } else {
811 float32 bneg = float32_chs(b->f32[i]);
812 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
813 &env->vec_status);
814 int le = le_rel != float_relation_greater;
815 int ge = ge_rel != float_relation_less;
816
817 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
818 all_in |= (!le | !ge);
819 }
820 }
821 if (record) {
822 env->crf[6] = (all_in == 0) << 1;
823 }
824 }
825
826 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
827 {
828 vcmpbfp_internal(env, r, a, b, 0);
829 }
830
831 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
832 ppc_avr_t *b)
833 {
834 vcmpbfp_internal(env, r, a, b, 1);
835 }
836
837 #define VCT(suffix, satcvt, element) \
838 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
839 ppc_avr_t *b, uint32_t uim) \
840 { \
841 int i; \
842 int sat = 0; \
843 float_status s = env->vec_status; \
844 \
845 set_float_rounding_mode(float_round_to_zero, &s); \
846 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
847 if (float32_is_any_nan(b->f32[i])) { \
848 r->element[i] = 0; \
849 } else { \
850 float64 t = float32_to_float64(b->f32[i], &s); \
851 int64_t j; \
852 \
853 t = float64_scalbn(t, uim, &s); \
854 j = float64_to_int64(t, &s); \
855 r->element[i] = satcvt(j, &sat); \
856 } \
857 } \
858 if (sat) { \
859 set_vscr_sat(env); \
860 } \
861 }
862 VCT(uxs, cvtsduw, u32)
863 VCT(sxs, cvtsdsw, s32)
864 #undef VCT
865
866 target_ulong helper_vclzlsbb(ppc_avr_t *r)
867 {
868 target_ulong count = 0;
869 int i;
870 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
871 if (r->VsrB(i) & 0x01) {
872 break;
873 }
874 count++;
875 }
876 return count;
877 }
878
879 target_ulong helper_vctzlsbb(ppc_avr_t *r)
880 {
881 target_ulong count = 0;
882 int i;
883 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
884 if (r->VsrB(i) & 0x01) {
885 break;
886 }
887 count++;
888 }
889 return count;
890 }
891
892 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
893 ppc_avr_t *b, ppc_avr_t *c)
894 {
895 int sat = 0;
896 int i;
897
898 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
899 int32_t prod = a->s16[i] * b->s16[i];
900 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
901
902 r->s16[i] = cvtswsh(t, &sat);
903 }
904
905 if (sat) {
906 set_vscr_sat(env);
907 }
908 }
909
910 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
911 ppc_avr_t *b, ppc_avr_t *c)
912 {
913 int sat = 0;
914 int i;
915
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
918 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
919 r->s16[i] = cvtswsh(t, &sat);
920 }
921
922 if (sat) {
923 set_vscr_sat(env);
924 }
925 }
926
927 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
928 {
929 int i;
930
931 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
932 int32_t prod = a->s16[i] * b->s16[i];
933 r->s16[i] = (int16_t) (prod + c->s16[i]);
934 }
935 }
936
937 #define VMRG_DO(name, element, access, ofs) \
938 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
939 { \
940 ppc_avr_t result; \
941 int i, half = ARRAY_SIZE(r->element) / 2; \
942 \
943 for (i = 0; i < half; i++) { \
944 result.access(i * 2 + 0) = a->access(i + ofs); \
945 result.access(i * 2 + 1) = b->access(i + ofs); \
946 } \
947 *r = result; \
948 }
949
950 #define VMRG(suffix, element, access) \
951 VMRG_DO(mrgl##suffix, element, access, half) \
952 VMRG_DO(mrgh##suffix, element, access, 0)
953 VMRG(b, u8, VsrB)
954 VMRG(h, u16, VsrH)
955 VMRG(w, u32, VsrW)
956 #undef VMRG_DO
957 #undef VMRG
958
959 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
960 ppc_avr_t *b, ppc_avr_t *c)
961 {
962 int32_t prod[16];
963 int i;
964
965 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
966 prod[i] = (int32_t)a->s8[i] * b->u8[i];
967 }
968
969 VECTOR_FOR_INORDER_I(i, s32) {
970 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
971 prod[4 * i + 2] + prod[4 * i + 3];
972 }
973 }
974
975 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
976 ppc_avr_t *b, ppc_avr_t *c)
977 {
978 int32_t prod[8];
979 int i;
980
981 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
982 prod[i] = a->s16[i] * b->s16[i];
983 }
984
985 VECTOR_FOR_INORDER_I(i, s32) {
986 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
987 }
988 }
989
990 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
991 ppc_avr_t *b, ppc_avr_t *c)
992 {
993 int32_t prod[8];
994 int i;
995 int sat = 0;
996
997 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
998 prod[i] = (int32_t)a->s16[i] * b->s16[i];
999 }
1000
1001 VECTOR_FOR_INORDER_I(i, s32) {
1002 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1003
1004 r->u32[i] = cvtsdsw(t, &sat);
1005 }
1006
1007 if (sat) {
1008 set_vscr_sat(env);
1009 }
1010 }
1011
1012 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1013 ppc_avr_t *b, ppc_avr_t *c)
1014 {
1015 uint16_t prod[16];
1016 int i;
1017
1018 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1019 prod[i] = a->u8[i] * b->u8[i];
1020 }
1021
1022 VECTOR_FOR_INORDER_I(i, u32) {
1023 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1024 prod[4 * i + 2] + prod[4 * i + 3];
1025 }
1026 }
1027
1028 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1029 ppc_avr_t *b, ppc_avr_t *c)
1030 {
1031 uint32_t prod[8];
1032 int i;
1033
1034 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1035 prod[i] = a->u16[i] * b->u16[i];
1036 }
1037
1038 VECTOR_FOR_INORDER_I(i, u32) {
1039 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1040 }
1041 }
1042
1043 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1044 ppc_avr_t *b, ppc_avr_t *c)
1045 {
1046 uint32_t prod[8];
1047 int i;
1048 int sat = 0;
1049
1050 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1051 prod[i] = a->u16[i] * b->u16[i];
1052 }
1053
1054 VECTOR_FOR_INORDER_I(i, s32) {
1055 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1056
1057 r->u32[i] = cvtuduw(t, &sat);
1058 }
1059
1060 if (sat) {
1061 set_vscr_sat(env);
1062 }
1063 }
1064
1065 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1066 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1067 { \
1068 int i; \
1069 \
1070 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1071 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1072 (cast)b->mul_access(i); \
1073 } \
1074 }
1075
1076 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1078 { \
1079 int i; \
1080 \
1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1082 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1083 (cast)b->mul_access(i + 1); \
1084 } \
1085 }
1086
1087 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1088 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1089 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1090 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1091 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1092 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1093 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1094 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1095 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1096 #undef VMUL_DO_EVN
1097 #undef VMUL_DO_ODD
1098 #undef VMUL
1099
1100 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1101 ppc_avr_t *c)
1102 {
1103 ppc_avr_t result;
1104 int i;
1105
1106 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1107 int s = c->VsrB(i) & 0x1f;
1108 int index = s & 0xf;
1109
1110 if (s & 0x10) {
1111 result.VsrB(i) = b->VsrB(index);
1112 } else {
1113 result.VsrB(i) = a->VsrB(index);
1114 }
1115 }
1116 *r = result;
1117 }
1118
1119 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1120 ppc_avr_t *c)
1121 {
1122 ppc_avr_t result;
1123 int i;
1124
1125 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1126 int s = c->VsrB(i) & 0x1f;
1127 int index = 15 - (s & 0xf);
1128
1129 if (s & 0x10) {
1130 result.VsrB(i) = a->VsrB(index);
1131 } else {
1132 result.VsrB(i) = b->VsrB(index);
1133 }
1134 }
1135 *r = result;
1136 }
1137
1138 #if defined(HOST_WORDS_BIGENDIAN)
1139 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1140 #define VBPERMD_INDEX(i) (i)
1141 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1142 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1143 #else
1144 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1145 #define VBPERMD_INDEX(i) (1 - i)
1146 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1147 #define EXTRACT_BIT(avr, i, index) \
1148 (extract64((avr)->u64[1 - i], 63 - index, 1))
1149 #endif
1150
1151 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1152 {
1153 int i, j;
1154 ppc_avr_t result = { .u64 = { 0, 0 } };
1155 VECTOR_FOR_INORDER_I(i, u64) {
1156 for (j = 0; j < 8; j++) {
1157 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1158 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1159 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1160 }
1161 }
1162 }
1163 *r = result;
1164 }
1165
1166 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1167 {
1168 int i;
1169 uint64_t perm = 0;
1170
1171 VECTOR_FOR_INORDER_I(i, u8) {
1172 int index = VBPERMQ_INDEX(b, i);
1173
1174 if (index < 128) {
1175 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1176 if (a->u64[VBPERMQ_DW(index)] & mask) {
1177 perm |= (0x8000 >> i);
1178 }
1179 }
1180 }
1181
1182 r->VsrD(0) = perm;
1183 r->VsrD(1) = 0;
1184 }
1185
1186 #undef VBPERMQ_INDEX
1187 #undef VBPERMQ_DW
1188
1189 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1190 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1191 { \
1192 int i, j; \
1193 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1194 \
1195 VECTOR_FOR_INORDER_I(i, srcfld) { \
1196 prod[i] = 0; \
1197 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1198 if (a->srcfld[i] & (1ull << j)) { \
1199 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1200 } \
1201 } \
1202 } \
1203 \
1204 VECTOR_FOR_INORDER_I(i, trgfld) { \
1205 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1206 } \
1207 }
1208
1209 PMSUM(vpmsumb, u8, u16, uint16_t)
1210 PMSUM(vpmsumh, u16, u32, uint32_t)
1211 PMSUM(vpmsumw, u32, u64, uint64_t)
1212
1213 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1214 {
1215
1216 #ifdef CONFIG_INT128
1217 int i, j;
1218 __uint128_t prod[2];
1219
1220 VECTOR_FOR_INORDER_I(i, u64) {
1221 prod[i] = 0;
1222 for (j = 0; j < 64; j++) {
1223 if (a->u64[i] & (1ull << j)) {
1224 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1225 }
1226 }
1227 }
1228
1229 r->u128 = prod[0] ^ prod[1];
1230
1231 #else
1232 int i, j;
1233 ppc_avr_t prod[2];
1234
1235 VECTOR_FOR_INORDER_I(i, u64) {
1236 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1237 for (j = 0; j < 64; j++) {
1238 if (a->u64[i] & (1ull << j)) {
1239 ppc_avr_t bshift;
1240 if (j == 0) {
1241 bshift.VsrD(0) = 0;
1242 bshift.VsrD(1) = b->u64[i];
1243 } else {
1244 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1245 bshift.VsrD(1) = b->u64[i] << j;
1246 }
1247 prod[i].VsrD(1) ^= bshift.VsrD(1);
1248 prod[i].VsrD(0) ^= bshift.VsrD(0);
1249 }
1250 }
1251 }
1252
1253 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1254 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1255 #endif
1256 }
1257
1258
1259 #if defined(HOST_WORDS_BIGENDIAN)
1260 #define PKBIG 1
1261 #else
1262 #define PKBIG 0
1263 #endif
1264 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1265 {
1266 int i, j;
1267 ppc_avr_t result;
1268 #if defined(HOST_WORDS_BIGENDIAN)
1269 const ppc_avr_t *x[2] = { a, b };
1270 #else
1271 const ppc_avr_t *x[2] = { b, a };
1272 #endif
1273
1274 VECTOR_FOR_INORDER_I(i, u64) {
1275 VECTOR_FOR_INORDER_I(j, u32) {
1276 uint32_t e = x[i]->u32[j];
1277
1278 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1279 ((e >> 6) & 0x3e0) |
1280 ((e >> 3) & 0x1f));
1281 }
1282 }
1283 *r = result;
1284 }
1285
1286 #define VPK(suffix, from, to, cvt, dosat) \
1287 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1288 ppc_avr_t *a, ppc_avr_t *b) \
1289 { \
1290 int i; \
1291 int sat = 0; \
1292 ppc_avr_t result; \
1293 ppc_avr_t *a0 = PKBIG ? a : b; \
1294 ppc_avr_t *a1 = PKBIG ? b : a; \
1295 \
1296 VECTOR_FOR_INORDER_I(i, from) { \
1297 result.to[i] = cvt(a0->from[i], &sat); \
1298 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1299 } \
1300 *r = result; \
1301 if (dosat && sat) { \
1302 set_vscr_sat(env); \
1303 } \
1304 }
1305 #define I(x, y) (x)
1306 VPK(shss, s16, s8, cvtshsb, 1)
1307 VPK(shus, s16, u8, cvtshub, 1)
1308 VPK(swss, s32, s16, cvtswsh, 1)
1309 VPK(swus, s32, u16, cvtswuh, 1)
1310 VPK(sdss, s64, s32, cvtsdsw, 1)
1311 VPK(sdus, s64, u32, cvtsduw, 1)
1312 VPK(uhus, u16, u8, cvtuhub, 1)
1313 VPK(uwus, u32, u16, cvtuwuh, 1)
1314 VPK(udus, u64, u32, cvtuduw, 1)
1315 VPK(uhum, u16, u8, I, 0)
1316 VPK(uwum, u32, u16, I, 0)
1317 VPK(udum, u64, u32, I, 0)
1318 #undef I
1319 #undef VPK
1320 #undef PKBIG
1321
1322 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1323 {
1324 int i;
1325
1326 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1327 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1328 }
1329 }
1330
1331 #define VRFI(suffix, rounding) \
1332 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1333 ppc_avr_t *b) \
1334 { \
1335 int i; \
1336 float_status s = env->vec_status; \
1337 \
1338 set_float_rounding_mode(rounding, &s); \
1339 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1340 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1341 } \
1342 }
1343 VRFI(n, float_round_nearest_even)
1344 VRFI(m, float_round_down)
1345 VRFI(p, float_round_up)
1346 VRFI(z, float_round_to_zero)
1347 #undef VRFI
1348
1349 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1350 {
1351 int i;
1352
1353 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1354 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1355
1356 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1357 }
1358 }
1359
1360 #define VRLMI(name, size, element, insert) \
1361 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1362 { \
1363 int i; \
1364 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1365 uint##size##_t src1 = a->element[i]; \
1366 uint##size##_t src2 = b->element[i]; \
1367 uint##size##_t src3 = r->element[i]; \
1368 uint##size##_t begin, end, shift, mask, rot_val; \
1369 \
1370 shift = extract##size(src2, 0, 6); \
1371 end = extract##size(src2, 8, 6); \
1372 begin = extract##size(src2, 16, 6); \
1373 rot_val = rol##size(src1, shift); \
1374 mask = mask_u##size(begin, end); \
1375 if (insert) { \
1376 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1377 } else { \
1378 r->element[i] = (rot_val & mask); \
1379 } \
1380 } \
1381 }
1382
1383 VRLMI(vrldmi, 64, u64, 1);
1384 VRLMI(vrlwmi, 32, u32, 1);
1385 VRLMI(vrldnm, 64, u64, 0);
1386 VRLMI(vrlwnm, 32, u32, 0);
1387
1388 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1389 ppc_avr_t *c)
1390 {
1391 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1392 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1393 }
1394
1395 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1396 {
1397 int i;
1398
1399 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1400 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1401 }
1402 }
1403
1404 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1405 {
1406 int i;
1407
1408 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1409 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1410 }
1411 }
1412
1413 #define VEXTU_X_DO(name, size, left) \
1414 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1415 { \
1416 int index = (a & 0xf) * 8; \
1417 if (left) { \
1418 index = 128 - index - size; \
1419 } \
1420 return int128_getlo(int128_rshift(b->s128, index)) & \
1421 MAKE_64BIT_MASK(0, size); \
1422 }
1423 VEXTU_X_DO(vextublx, 8, 1)
1424 VEXTU_X_DO(vextuhlx, 16, 1)
1425 VEXTU_X_DO(vextuwlx, 32, 1)
1426 VEXTU_X_DO(vextubrx, 8, 0)
1427 VEXTU_X_DO(vextuhrx, 16, 0)
1428 VEXTU_X_DO(vextuwrx, 32, 0)
1429 #undef VEXTU_X_DO
1430
1431 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1432 {
1433 int i;
1434 unsigned int shift, bytes, size;
1435
1436 size = ARRAY_SIZE(r->u8);
1437 for (i = 0; i < size; i++) {
1438 shift = b->VsrB(i) & 0x7; /* extract shift value */
1439 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1440 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1441 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1442 }
1443 }
1444
1445 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1446 {
1447 int i;
1448 unsigned int shift, bytes;
1449
1450 /*
1451 * Use reverse order, as destination and source register can be
1452 * same. Its being modified in place saving temporary, reverse
1453 * order will guarantee that computed result is not fed back.
1454 */
1455 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1456 shift = b->VsrB(i) & 0x7; /* extract shift value */
1457 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1458 /* extract adjacent bytes */
1459 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1460 }
1461 }
1462
1463 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1464 {
1465 int sh = shift & 0xf;
1466 int i;
1467 ppc_avr_t result;
1468
1469 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1470 int index = sh + i;
1471 if (index > 0xf) {
1472 result.VsrB(i) = b->VsrB(index - 0x10);
1473 } else {
1474 result.VsrB(i) = a->VsrB(index);
1475 }
1476 }
1477 *r = result;
1478 }
1479
1480 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1481 {
1482 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1483
1484 #if defined(HOST_WORDS_BIGENDIAN)
1485 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1486 memset(&r->u8[16 - sh], 0, sh);
1487 #else
1488 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1489 memset(&r->u8[0], 0, sh);
1490 #endif
1491 }
1492
1493 #if defined(HOST_WORDS_BIGENDIAN)
1494 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1495 #else
1496 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1497 #endif
1498
1499 #define VINSX(SUFFIX, TYPE) \
1500 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1501 uint64_t val, target_ulong index) \
1502 { \
1503 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1504 target_long idx = index; \
1505 \
1506 if (idx < 0 || idx > maxidx) { \
1507 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1508 qemu_log_mask(LOG_GUEST_ERROR, \
1509 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1510 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1511 } else { \
1512 TYPE src = val; \
1513 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1514 } \
1515 }
1516 VINSX(B, uint8_t)
1517 VINSX(H, uint16_t)
1518 VINSX(W, uint32_t)
1519 VINSX(D, uint64_t)
1520 #undef ELEM_ADDR
1521 #undef VINSX
1522 #if defined(HOST_WORDS_BIGENDIAN)
1523 #define VEXTDVLX(NAME, SIZE) \
1524 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1525 target_ulong index) \
1526 { \
1527 const target_long idx = index; \
1528 ppc_avr_t tmp[2] = { *a, *b }; \
1529 memset(t, 0, sizeof(*t)); \
1530 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1531 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1532 } else { \
1533 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1534 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1535 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1536 } \
1537 }
1538 #else
1539 #define VEXTDVLX(NAME, SIZE) \
1540 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1541 target_ulong index) \
1542 { \
1543 const target_long idx = index; \
1544 ppc_avr_t tmp[2] = { *b, *a }; \
1545 memset(t, 0, sizeof(*t)); \
1546 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1547 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1548 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1549 } else { \
1550 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1551 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1552 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1553 } \
1554 }
1555 #endif
1556 VEXTDVLX(VEXTDUBVLX, 1)
1557 VEXTDVLX(VEXTDUHVLX, 2)
1558 VEXTDVLX(VEXTDUWVLX, 4)
1559 VEXTDVLX(VEXTDDVLX, 8)
1560 #undef VEXTDVLX
1561 #if defined(HOST_WORDS_BIGENDIAN)
1562 #define VEXTRACT(suffix, element) \
1563 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1564 { \
1565 uint32_t es = sizeof(r->element[0]); \
1566 memmove(&r->u8[8 - es], &b->u8[index], es); \
1567 memset(&r->u8[8], 0, 8); \
1568 memset(&r->u8[0], 0, 8 - es); \
1569 }
1570 #else
1571 #define VEXTRACT(suffix, element) \
1572 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1573 { \
1574 uint32_t es = sizeof(r->element[0]); \
1575 uint32_t s = (16 - index) - es; \
1576 memmove(&r->u8[8], &b->u8[s], es); \
1577 memset(&r->u8[0], 0, 8); \
1578 memset(&r->u8[8 + es], 0, 8 - es); \
1579 }
1580 #endif
1581 VEXTRACT(ub, u8)
1582 VEXTRACT(uh, u16)
1583 VEXTRACT(uw, u32)
1584 VEXTRACT(d, u64)
1585 #undef VEXTRACT
1586
1587 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1588 ppc_vsr_t *xb, uint32_t index)
1589 {
1590 ppc_vsr_t t = { };
1591 size_t es = sizeof(uint32_t);
1592 uint32_t ext_index;
1593 int i;
1594
1595 ext_index = index;
1596 for (i = 0; i < es; i++, ext_index++) {
1597 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1598 }
1599
1600 *xt = t;
1601 }
1602
1603 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1604 ppc_vsr_t *xb, uint32_t index)
1605 {
1606 ppc_vsr_t t = *xt;
1607 size_t es = sizeof(uint32_t);
1608 int ins_index, i = 0;
1609
1610 ins_index = index;
1611 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1612 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1613 }
1614
1615 *xt = t;
1616 }
1617
1618 #define XXBLEND(name, sz) \
1619 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1620 ppc_avr_t *c, uint32_t desc) \
1621 { \
1622 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1623 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1624 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1625 } \
1626 }
1627 XXBLEND(B, 8)
1628 XXBLEND(H, 16)
1629 XXBLEND(W, 32)
1630 XXBLEND(D, 64)
1631 #undef XXBLEND
1632
1633 #define VNEG(name, element) \
1634 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1635 { \
1636 int i; \
1637 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1638 r->element[i] = -b->element[i]; \
1639 } \
1640 }
1641 VNEG(vnegw, s32)
1642 VNEG(vnegd, s64)
1643 #undef VNEG
1644
1645 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1646 {
1647 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1648
1649 #if defined(HOST_WORDS_BIGENDIAN)
1650 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1651 memset(&r->u8[0], 0, sh);
1652 #else
1653 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1654 memset(&r->u8[16 - sh], 0, sh);
1655 #endif
1656 }
1657
1658 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1659 {
1660 int i;
1661
1662 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1663 r->u32[i] = a->u32[i] >= b->u32[i];
1664 }
1665 }
1666
1667 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1668 {
1669 int64_t t;
1670 int i, upper;
1671 ppc_avr_t result;
1672 int sat = 0;
1673
1674 upper = ARRAY_SIZE(r->s32) - 1;
1675 t = (int64_t)b->VsrSW(upper);
1676 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1677 t += a->VsrSW(i);
1678 result.VsrSW(i) = 0;
1679 }
1680 result.VsrSW(upper) = cvtsdsw(t, &sat);
1681 *r = result;
1682
1683 if (sat) {
1684 set_vscr_sat(env);
1685 }
1686 }
1687
1688 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1689 {
1690 int i, j, upper;
1691 ppc_avr_t result;
1692 int sat = 0;
1693
1694 upper = 1;
1695 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1696 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1697
1698 result.VsrD(i) = 0;
1699 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1700 t += a->VsrSW(2 * i + j);
1701 }
1702 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1703 }
1704
1705 *r = result;
1706 if (sat) {
1707 set_vscr_sat(env);
1708 }
1709 }
1710
1711 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1712 {
1713 int i, j;
1714 int sat = 0;
1715
1716 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1717 int64_t t = (int64_t)b->s32[i];
1718
1719 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1720 t += a->s8[4 * i + j];
1721 }
1722 r->s32[i] = cvtsdsw(t, &sat);
1723 }
1724
1725 if (sat) {
1726 set_vscr_sat(env);
1727 }
1728 }
1729
1730 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1731 {
1732 int sat = 0;
1733 int i;
1734
1735 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1736 int64_t t = (int64_t)b->s32[i];
1737
1738 t += a->s16[2 * i] + a->s16[2 * i + 1];
1739 r->s32[i] = cvtsdsw(t, &sat);
1740 }
1741
1742 if (sat) {
1743 set_vscr_sat(env);
1744 }
1745 }
1746
1747 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1748 {
1749 int i, j;
1750 int sat = 0;
1751
1752 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1753 uint64_t t = (uint64_t)b->u32[i];
1754
1755 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1756 t += a->u8[4 * i + j];
1757 }
1758 r->u32[i] = cvtuduw(t, &sat);
1759 }
1760
1761 if (sat) {
1762 set_vscr_sat(env);
1763 }
1764 }
1765
1766 #if defined(HOST_WORDS_BIGENDIAN)
1767 #define UPKHI 1
1768 #define UPKLO 0
1769 #else
1770 #define UPKHI 0
1771 #define UPKLO 1
1772 #endif
1773 #define VUPKPX(suffix, hi) \
1774 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1775 { \
1776 int i; \
1777 ppc_avr_t result; \
1778 \
1779 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1780 uint16_t e = b->u16[hi ? i : i + 4]; \
1781 uint8_t a = (e >> 15) ? 0xff : 0; \
1782 uint8_t r = (e >> 10) & 0x1f; \
1783 uint8_t g = (e >> 5) & 0x1f; \
1784 uint8_t b = e & 0x1f; \
1785 \
1786 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1787 } \
1788 *r = result; \
1789 }
1790 VUPKPX(lpx, UPKLO)
1791 VUPKPX(hpx, UPKHI)
1792 #undef VUPKPX
1793
1794 #define VUPK(suffix, unpacked, packee, hi) \
1795 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1796 { \
1797 int i; \
1798 ppc_avr_t result; \
1799 \
1800 if (hi) { \
1801 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1802 result.unpacked[i] = b->packee[i]; \
1803 } \
1804 } else { \
1805 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1806 i++) { \
1807 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1808 } \
1809 } \
1810 *r = result; \
1811 }
1812 VUPK(hsb, s16, s8, UPKHI)
1813 VUPK(hsh, s32, s16, UPKHI)
1814 VUPK(hsw, s64, s32, UPKHI)
1815 VUPK(lsb, s16, s8, UPKLO)
1816 VUPK(lsh, s32, s16, UPKLO)
1817 VUPK(lsw, s64, s32, UPKLO)
1818 #undef VUPK
1819 #undef UPKHI
1820 #undef UPKLO
1821
1822 #define VGENERIC_DO(name, element) \
1823 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1824 { \
1825 int i; \
1826 \
1827 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1828 r->element[i] = name(b->element[i]); \
1829 } \
1830 }
1831
1832 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1833 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1834
1835 VGENERIC_DO(clzb, u8)
1836 VGENERIC_DO(clzh, u16)
1837
1838 #undef clzb
1839 #undef clzh
1840
1841 #define ctzb(v) ((v) ? ctz32(v) : 8)
1842 #define ctzh(v) ((v) ? ctz32(v) : 16)
1843 #define ctzw(v) ctz32((v))
1844 #define ctzd(v) ctz64((v))
1845
1846 VGENERIC_DO(ctzb, u8)
1847 VGENERIC_DO(ctzh, u16)
1848 VGENERIC_DO(ctzw, u32)
1849 VGENERIC_DO(ctzd, u64)
1850
1851 #undef ctzb
1852 #undef ctzh
1853 #undef ctzw
1854 #undef ctzd
1855
1856 #define popcntb(v) ctpop8(v)
1857 #define popcnth(v) ctpop16(v)
1858 #define popcntw(v) ctpop32(v)
1859 #define popcntd(v) ctpop64(v)
1860
1861 VGENERIC_DO(popcntb, u8)
1862 VGENERIC_DO(popcnth, u16)
1863 VGENERIC_DO(popcntw, u32)
1864 VGENERIC_DO(popcntd, u64)
1865
1866 #undef popcntb
1867 #undef popcnth
1868 #undef popcntw
1869 #undef popcntd
1870
1871 #undef VGENERIC_DO
1872
1873 #if defined(HOST_WORDS_BIGENDIAN)
1874 #define QW_ONE { .u64 = { 0, 1 } }
1875 #else
1876 #define QW_ONE { .u64 = { 1, 0 } }
1877 #endif
1878
1879 #ifndef CONFIG_INT128
1880
1881 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1882 {
1883 t->u64[0] = ~a.u64[0];
1884 t->u64[1] = ~a.u64[1];
1885 }
1886
1887 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1888 {
1889 if (a.VsrD(0) < b.VsrD(0)) {
1890 return -1;
1891 } else if (a.VsrD(0) > b.VsrD(0)) {
1892 return 1;
1893 } else if (a.VsrD(1) < b.VsrD(1)) {
1894 return -1;
1895 } else if (a.VsrD(1) > b.VsrD(1)) {
1896 return 1;
1897 } else {
1898 return 0;
1899 }
1900 }
1901
1902 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1903 {
1904 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1905 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1906 (~a.VsrD(1) < b.VsrD(1));
1907 }
1908
1909 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1910 {
1911 ppc_avr_t not_a;
1912 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1913 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1914 (~a.VsrD(1) < b.VsrD(1));
1915 avr_qw_not(&not_a, a);
1916 return avr_qw_cmpu(not_a, b) < 0;
1917 }
1918
1919 #endif
1920
1921 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1922 {
1923 #ifdef CONFIG_INT128
1924 r->u128 = a->u128 + b->u128;
1925 #else
1926 avr_qw_add(r, *a, *b);
1927 #endif
1928 }
1929
1930 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1931 {
1932 #ifdef CONFIG_INT128
1933 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1934 #else
1935
1936 if (c->VsrD(1) & 1) {
1937 ppc_avr_t tmp;
1938
1939 tmp.VsrD(0) = 0;
1940 tmp.VsrD(1) = c->VsrD(1) & 1;
1941 avr_qw_add(&tmp, *a, tmp);
1942 avr_qw_add(r, tmp, *b);
1943 } else {
1944 avr_qw_add(r, *a, *b);
1945 }
1946 #endif
1947 }
1948
1949 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1950 {
1951 #ifdef CONFIG_INT128
1952 r->u128 = (~a->u128 < b->u128);
1953 #else
1954 ppc_avr_t not_a;
1955
1956 avr_qw_not(&not_a, *a);
1957
1958 r->VsrD(0) = 0;
1959 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1960 #endif
1961 }
1962
1963 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1964 {
1965 #ifdef CONFIG_INT128
1966 int carry_out = (~a->u128 < b->u128);
1967 if (!carry_out && (c->u128 & 1)) {
1968 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1969 ((a->u128 != 0) || (b->u128 != 0));
1970 }
1971 r->u128 = carry_out;
1972 #else
1973
1974 int carry_in = c->VsrD(1) & 1;
1975 int carry_out = 0;
1976 ppc_avr_t tmp;
1977
1978 carry_out = avr_qw_addc(&tmp, *a, *b);
1979
1980 if (!carry_out && carry_in) {
1981 ppc_avr_t one = QW_ONE;
1982 carry_out = avr_qw_addc(&tmp, tmp, one);
1983 }
1984 r->VsrD(0) = 0;
1985 r->VsrD(1) = carry_out;
1986 #endif
1987 }
1988
1989 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1990 {
1991 #ifdef CONFIG_INT128
1992 r->u128 = a->u128 - b->u128;
1993 #else
1994 ppc_avr_t tmp;
1995 ppc_avr_t one = QW_ONE;
1996
1997 avr_qw_not(&tmp, *b);
1998 avr_qw_add(&tmp, *a, tmp);
1999 avr_qw_add(r, tmp, one);
2000 #endif
2001 }
2002
2003 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2004 {
2005 #ifdef CONFIG_INT128
2006 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2007 #else
2008 ppc_avr_t tmp, sum;
2009
2010 avr_qw_not(&tmp, *b);
2011 avr_qw_add(&sum, *a, tmp);
2012
2013 tmp.VsrD(0) = 0;
2014 tmp.VsrD(1) = c->VsrD(1) & 1;
2015 avr_qw_add(r, sum, tmp);
2016 #endif
2017 }
2018
2019 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2020 {
2021 #ifdef CONFIG_INT128
2022 r->u128 = (~a->u128 < ~b->u128) ||
2023 (a->u128 + ~b->u128 == (__uint128_t)-1);
2024 #else
2025 int carry = (avr_qw_cmpu(*a, *b) > 0);
2026 if (!carry) {
2027 ppc_avr_t tmp;
2028 avr_qw_not(&tmp, *b);
2029 avr_qw_add(&tmp, *a, tmp);
2030 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2031 }
2032 r->VsrD(0) = 0;
2033 r->VsrD(1) = carry;
2034 #endif
2035 }
2036
2037 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2038 {
2039 #ifdef CONFIG_INT128
2040 r->u128 =
2041 (~a->u128 < ~b->u128) ||
2042 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2043 #else
2044 int carry_in = c->VsrD(1) & 1;
2045 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2046 if (!carry_out && carry_in) {
2047 ppc_avr_t tmp;
2048 avr_qw_not(&tmp, *b);
2049 avr_qw_add(&tmp, *a, tmp);
2050 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2051 }
2052
2053 r->VsrD(0) = 0;
2054 r->VsrD(1) = carry_out;
2055 #endif
2056 }
2057
2058 #define BCD_PLUS_PREF_1 0xC
2059 #define BCD_PLUS_PREF_2 0xF
2060 #define BCD_PLUS_ALT_1 0xA
2061 #define BCD_NEG_PREF 0xD
2062 #define BCD_NEG_ALT 0xB
2063 #define BCD_PLUS_ALT_2 0xE
2064 #define NATIONAL_PLUS 0x2B
2065 #define NATIONAL_NEG 0x2D
2066
2067 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2068
2069 static int bcd_get_sgn(ppc_avr_t *bcd)
2070 {
2071 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2072 case BCD_PLUS_PREF_1:
2073 case BCD_PLUS_PREF_2:
2074 case BCD_PLUS_ALT_1:
2075 case BCD_PLUS_ALT_2:
2076 {
2077 return 1;
2078 }
2079
2080 case BCD_NEG_PREF:
2081 case BCD_NEG_ALT:
2082 {
2083 return -1;
2084 }
2085
2086 default:
2087 {
2088 return 0;
2089 }
2090 }
2091 }
2092
2093 static int bcd_preferred_sgn(int sgn, int ps)
2094 {
2095 if (sgn >= 0) {
2096 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2097 } else {
2098 return BCD_NEG_PREF;
2099 }
2100 }
2101
2102 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2103 {
2104 uint8_t result;
2105 if (n & 1) {
2106 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2107 } else {
2108 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2109 }
2110
2111 if (unlikely(result > 9)) {
2112 *invalid = true;
2113 }
2114 return result;
2115 }
2116
2117 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2118 {
2119 if (n & 1) {
2120 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2121 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2122 } else {
2123 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2124 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2125 }
2126 }
2127
2128 static bool bcd_is_valid(ppc_avr_t *bcd)
2129 {
2130 int i;
2131 int invalid = 0;
2132
2133 if (bcd_get_sgn(bcd) == 0) {
2134 return false;
2135 }
2136
2137 for (i = 1; i < 32; i++) {
2138 bcd_get_digit(bcd, i, &invalid);
2139 if (unlikely(invalid)) {
2140 return false;
2141 }
2142 }
2143 return true;
2144 }
2145
2146 static int bcd_cmp_zero(ppc_avr_t *bcd)
2147 {
2148 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2149 return CRF_EQ;
2150 } else {
2151 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2152 }
2153 }
2154
2155 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2156 {
2157 return reg->VsrH(7 - n);
2158 }
2159
2160 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2161 {
2162 reg->VsrH(7 - n) = val;
2163 }
2164
2165 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2166 {
2167 int i;
2168 int invalid = 0;
2169 for (i = 31; i > 0; i--) {
2170 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2171 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2172 if (unlikely(invalid)) {
2173 return 0; /* doesn't matter */
2174 } else if (dig_a > dig_b) {
2175 return 1;
2176 } else if (dig_a < dig_b) {
2177 return -1;
2178 }
2179 }
2180
2181 return 0;
2182 }
2183
2184 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2185 int *overflow)
2186 {
2187 int carry = 0;
2188 int i;
2189 int is_zero = 1;
2190
2191 for (i = 1; i <= 31; i++) {
2192 uint8_t digit = bcd_get_digit(a, i, invalid) +
2193 bcd_get_digit(b, i, invalid) + carry;
2194 is_zero &= (digit == 0);
2195 if (digit > 9) {
2196 carry = 1;
2197 digit -= 10;
2198 } else {
2199 carry = 0;
2200 }
2201
2202 bcd_put_digit(t, digit, i);
2203 }
2204
2205 *overflow = carry;
2206 return is_zero;
2207 }
2208
2209 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2210 int *overflow)
2211 {
2212 int carry = 0;
2213 int i;
2214
2215 for (i = 1; i <= 31; i++) {
2216 uint8_t digit = bcd_get_digit(a, i, invalid) -
2217 bcd_get_digit(b, i, invalid) + carry;
2218 if (digit & 0x80) {
2219 carry = -1;
2220 digit += 10;
2221 } else {
2222 carry = 0;
2223 }
2224
2225 bcd_put_digit(t, digit, i);
2226 }
2227
2228 *overflow = carry;
2229 }
2230
2231 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2232 {
2233
2234 int sgna = bcd_get_sgn(a);
2235 int sgnb = bcd_get_sgn(b);
2236 int invalid = (sgna == 0) || (sgnb == 0);
2237 int overflow = 0;
2238 int zero = 0;
2239 uint32_t cr = 0;
2240 ppc_avr_t result = { .u64 = { 0, 0 } };
2241
2242 if (!invalid) {
2243 if (sgna == sgnb) {
2244 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2245 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2246 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2247 } else {
2248 int magnitude = bcd_cmp_mag(a, b);
2249 if (magnitude > 0) {
2250 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2251 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2252 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2253 } else if (magnitude < 0) {
2254 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2255 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2256 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2257 } else {
2258 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2259 cr = CRF_EQ;
2260 }
2261 }
2262 }
2263
2264 if (unlikely(invalid)) {
2265 result.VsrD(0) = result.VsrD(1) = -1;
2266 cr = CRF_SO;
2267 } else if (overflow) {
2268 cr |= CRF_SO;
2269 } else if (zero) {
2270 cr |= CRF_EQ;
2271 }
2272
2273 *r = result;
2274
2275 return cr;
2276 }
2277
2278 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2279 {
2280 ppc_avr_t bcopy = *b;
2281 int sgnb = bcd_get_sgn(b);
2282 if (sgnb < 0) {
2283 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2284 } else if (sgnb > 0) {
2285 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2286 }
2287 /* else invalid ... defer to bcdadd code for proper handling */
2288
2289 return helper_bcdadd(r, a, &bcopy, ps);
2290 }
2291
2292 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2293 {
2294 int i;
2295 int cr = 0;
2296 uint16_t national = 0;
2297 uint16_t sgnb = get_national_digit(b, 0);
2298 ppc_avr_t ret = { .u64 = { 0, 0 } };
2299 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2300
2301 for (i = 1; i < 8; i++) {
2302 national = get_national_digit(b, i);
2303 if (unlikely(national < 0x30 || national > 0x39)) {
2304 invalid = 1;
2305 break;
2306 }
2307
2308 bcd_put_digit(&ret, national & 0xf, i);
2309 }
2310
2311 if (sgnb == NATIONAL_PLUS) {
2312 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2313 } else {
2314 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2315 }
2316
2317 cr = bcd_cmp_zero(&ret);
2318
2319 if (unlikely(invalid)) {
2320 cr = CRF_SO;
2321 }
2322
2323 *r = ret;
2324
2325 return cr;
2326 }
2327
2328 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2329 {
2330 int i;
2331 int cr = 0;
2332 int sgnb = bcd_get_sgn(b);
2333 int invalid = (sgnb == 0);
2334 ppc_avr_t ret = { .u64 = { 0, 0 } };
2335
2336 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2337
2338 for (i = 1; i < 8; i++) {
2339 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2340
2341 if (unlikely(invalid)) {
2342 break;
2343 }
2344 }
2345 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2346
2347 cr = bcd_cmp_zero(b);
2348
2349 if (ox_flag) {
2350 cr |= CRF_SO;
2351 }
2352
2353 if (unlikely(invalid)) {
2354 cr = CRF_SO;
2355 }
2356
2357 *r = ret;
2358
2359 return cr;
2360 }
2361
2362 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2363 {
2364 int i;
2365 int cr = 0;
2366 int invalid = 0;
2367 int zone_digit = 0;
2368 int zone_lead = ps ? 0xF : 0x3;
2369 int digit = 0;
2370 ppc_avr_t ret = { .u64 = { 0, 0 } };
2371 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2372
2373 if (unlikely((sgnb < 0xA) && ps)) {
2374 invalid = 1;
2375 }
2376
2377 for (i = 0; i < 16; i++) {
2378 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2379 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2380 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2381 invalid = 1;
2382 break;
2383 }
2384
2385 bcd_put_digit(&ret, digit, i + 1);
2386 }
2387
2388 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2389 (!ps && (sgnb & 0x4))) {
2390 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2391 } else {
2392 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2393 }
2394
2395 cr = bcd_cmp_zero(&ret);
2396
2397 if (unlikely(invalid)) {
2398 cr = CRF_SO;
2399 }
2400
2401 *r = ret;
2402
2403 return cr;
2404 }
2405
2406 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2407 {
2408 int i;
2409 int cr = 0;
2410 uint8_t digit = 0;
2411 int sgnb = bcd_get_sgn(b);
2412 int zone_lead = (ps) ? 0xF0 : 0x30;
2413 int invalid = (sgnb == 0);
2414 ppc_avr_t ret = { .u64 = { 0, 0 } };
2415
2416 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2417
2418 for (i = 0; i < 16; i++) {
2419 digit = bcd_get_digit(b, i + 1, &invalid);
2420
2421 if (unlikely(invalid)) {
2422 break;
2423 }
2424
2425 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2426 }
2427
2428 if (ps) {
2429 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2430 } else {
2431 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2432 }
2433
2434 cr = bcd_cmp_zero(b);
2435
2436 if (ox_flag) {
2437 cr |= CRF_SO;
2438 }
2439
2440 if (unlikely(invalid)) {
2441 cr = CRF_SO;
2442 }
2443
2444 *r = ret;
2445
2446 return cr;
2447 }
2448
2449 /**
2450 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2451 *
2452 * Returns:
2453 * > 0 if ahi|alo > bhi|blo,
2454 * 0 if ahi|alo == bhi|blo,
2455 * < 0 if ahi|alo < bhi|blo
2456 */
2457 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2458 uint64_t blo, uint64_t bhi)
2459 {
2460 return (ahi == bhi) ?
2461 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2462 (ahi > bhi ? 1 : -1);
2463 }
2464
2465 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2466 {
2467 int i;
2468 int cr;
2469 uint64_t lo_value;
2470 uint64_t hi_value;
2471 uint64_t rem;
2472 ppc_avr_t ret = { .u64 = { 0, 0 } };
2473
2474 if (b->VsrSD(0) < 0) {
2475 lo_value = -b->VsrSD(1);
2476 hi_value = ~b->VsrD(0) + !lo_value;
2477 bcd_put_digit(&ret, 0xD, 0);
2478
2479 cr = CRF_LT;
2480 } else {
2481 lo_value = b->VsrD(1);
2482 hi_value = b->VsrD(0);
2483 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2484
2485 if (hi_value == 0 && lo_value == 0) {
2486 cr = CRF_EQ;
2487 } else {
2488 cr = CRF_GT;
2489 }
2490 }
2491
2492 /*
2493 * Check src limits: abs(src) <= 10^31 - 1
2494 *
2495 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2496 */
2497 if (ucmp128(lo_value, hi_value,
2498 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2499 cr |= CRF_SO;
2500
2501 /*
2502 * According to the ISA, if src wouldn't fit in the destination
2503 * register, the result is undefined.
2504 * In that case, we leave r unchanged.
2505 */
2506 } else {
2507 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2508
2509 for (i = 1; i < 16; rem /= 10, i++) {
2510 bcd_put_digit(&ret, rem % 10, i);
2511 }
2512
2513 for (; i < 32; lo_value /= 10, i++) {
2514 bcd_put_digit(&ret, lo_value % 10, i);
2515 }
2516
2517 *r = ret;
2518 }
2519
2520 return cr;
2521 }
2522
2523 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2524 {
2525 uint8_t i;
2526 int cr;
2527 uint64_t carry;
2528 uint64_t unused;
2529 uint64_t lo_value;
2530 uint64_t hi_value = 0;
2531 int sgnb = bcd_get_sgn(b);
2532 int invalid = (sgnb == 0);
2533
2534 lo_value = bcd_get_digit(b, 31, &invalid);
2535 for (i = 30; i > 0; i--) {
2536 mulu64(&lo_value, &carry, lo_value, 10ULL);
2537 mulu64(&hi_value, &unused, hi_value, 10ULL);
2538 lo_value += bcd_get_digit(b, i, &invalid);
2539 hi_value += carry;
2540
2541 if (unlikely(invalid)) {
2542 break;
2543 }
2544 }
2545
2546 if (sgnb == -1) {
2547 r->VsrSD(1) = -lo_value;
2548 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2549 } else {
2550 r->VsrSD(1) = lo_value;
2551 r->VsrSD(0) = hi_value;
2552 }
2553
2554 cr = bcd_cmp_zero(b);
2555
2556 if (unlikely(invalid)) {
2557 cr = CRF_SO;
2558 }
2559
2560 return cr;
2561 }
2562
2563 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2564 {
2565 int i;
2566 int invalid = 0;
2567
2568 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2569 return CRF_SO;
2570 }
2571
2572 *r = *a;
2573 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2574
2575 for (i = 1; i < 32; i++) {
2576 bcd_get_digit(a, i, &invalid);
2577 bcd_get_digit(b, i, &invalid);
2578 if (unlikely(invalid)) {
2579 return CRF_SO;
2580 }
2581 }
2582
2583 return bcd_cmp_zero(r);
2584 }
2585
2586 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2587 {
2588 int sgnb = bcd_get_sgn(b);
2589
2590 *r = *b;
2591 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2592
2593 if (bcd_is_valid(b) == false) {
2594 return CRF_SO;
2595 }
2596
2597 return bcd_cmp_zero(r);
2598 }
2599
2600 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2601 {
2602 int cr;
2603 int i = a->VsrSB(7);
2604 bool ox_flag = false;
2605 int sgnb = bcd_get_sgn(b);
2606 ppc_avr_t ret = *b;
2607 ret.VsrD(1) &= ~0xf;
2608
2609 if (bcd_is_valid(b) == false) {
2610 return CRF_SO;
2611 }
2612
2613 if (unlikely(i > 31)) {
2614 i = 31;
2615 } else if (unlikely(i < -31)) {
2616 i = -31;
2617 }
2618
2619 if (i > 0) {
2620 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2621 } else {
2622 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2623 }
2624 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2625
2626 *r = ret;
2627
2628 cr = bcd_cmp_zero(r);
2629 if (ox_flag) {
2630 cr |= CRF_SO;
2631 }
2632
2633 return cr;
2634 }
2635
2636 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2637 {
2638 int cr;
2639 int i;
2640 int invalid = 0;
2641 bool ox_flag = false;
2642 ppc_avr_t ret = *b;
2643
2644 for (i = 0; i < 32; i++) {
2645 bcd_get_digit(b, i, &invalid);
2646
2647 if (unlikely(invalid)) {
2648 return CRF_SO;
2649 }
2650 }
2651
2652 i = a->VsrSB(7);
2653 if (i >= 32) {
2654 ox_flag = true;
2655 ret.VsrD(1) = ret.VsrD(0) = 0;
2656 } else if (i <= -32) {
2657 ret.VsrD(1) = ret.VsrD(0) = 0;
2658 } else if (i > 0) {
2659 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2660 } else {
2661 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2662 }
2663 *r = ret;
2664
2665 cr = bcd_cmp_zero(r);
2666 if (ox_flag) {
2667 cr |= CRF_SO;
2668 }
2669
2670 return cr;
2671 }
2672
2673 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2674 {
2675 int cr;
2676 int unused = 0;
2677 int invalid = 0;
2678 bool ox_flag = false;
2679 int sgnb = bcd_get_sgn(b);
2680 ppc_avr_t ret = *b;
2681 ret.VsrD(1) &= ~0xf;
2682
2683 int i = a->VsrSB(7);
2684 ppc_avr_t bcd_one;
2685
2686 bcd_one.VsrD(0) = 0;
2687 bcd_one.VsrD(1) = 0x10;
2688
2689 if (bcd_is_valid(b) == false) {
2690 return CRF_SO;
2691 }
2692
2693 if (unlikely(i > 31)) {
2694 i = 31;
2695 } else if (unlikely(i < -31)) {
2696 i = -31;
2697 }
2698
2699 if (i > 0) {
2700 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2701 } else {
2702 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2703
2704 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2705 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2706 }
2707 }
2708 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2709
2710 cr = bcd_cmp_zero(&ret);
2711 if (ox_flag) {
2712 cr |= CRF_SO;
2713 }
2714 *r = ret;
2715
2716 return cr;
2717 }
2718
2719 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2720 {
2721 uint64_t mask;
2722 uint32_t ox_flag = 0;
2723 int i = a->VsrSH(3) + 1;
2724 ppc_avr_t ret = *b;
2725
2726 if (bcd_is_valid(b) == false) {
2727 return CRF_SO;
2728 }
2729
2730 if (i > 16 && i < 32) {
2731 mask = (uint64_t)-1 >> (128 - i * 4);
2732 if (ret.VsrD(0) & ~mask) {
2733 ox_flag = CRF_SO;
2734 }
2735
2736 ret.VsrD(0) &= mask;
2737 } else if (i >= 0 && i <= 16) {
2738 mask = (uint64_t)-1 >> (64 - i * 4);
2739 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2740 ox_flag = CRF_SO;
2741 }
2742
2743 ret.VsrD(1) &= mask;
2744 ret.VsrD(0) = 0;
2745 }
2746 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2747 *r = ret;
2748
2749 return bcd_cmp_zero(&ret) | ox_flag;
2750 }
2751
2752 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2753 {
2754 int i;
2755 uint64_t mask;
2756 uint32_t ox_flag = 0;
2757 int invalid = 0;
2758 ppc_avr_t ret = *b;
2759
2760 for (i = 0; i < 32; i++) {
2761 bcd_get_digit(b, i, &invalid);
2762
2763 if (unlikely(invalid)) {
2764 return CRF_SO;
2765 }
2766 }
2767
2768 i = a->VsrSH(3);
2769 if (i > 16 && i < 33) {
2770 mask = (uint64_t)-1 >> (128 - i * 4);
2771 if (ret.VsrD(0) & ~mask) {
2772 ox_flag = CRF_SO;
2773 }
2774
2775 ret.VsrD(0) &= mask;
2776 } else if (i > 0 && i <= 16) {
2777 mask = (uint64_t)-1 >> (64 - i * 4);
2778 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2779 ox_flag = CRF_SO;
2780 }
2781
2782 ret.VsrD(1) &= mask;
2783 ret.VsrD(0) = 0;
2784 } else if (i == 0) {
2785 if (ret.VsrD(0) || ret.VsrD(1)) {
2786 ox_flag = CRF_SO;
2787 }
2788 ret.VsrD(0) = ret.VsrD(1) = 0;
2789 }
2790
2791 *r = ret;
2792 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2793 return ox_flag | CRF_EQ;
2794 }
2795
2796 return ox_flag | CRF_GT;
2797 }
2798
2799 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2800 {
2801 int i;
2802 VECTOR_FOR_INORDER_I(i, u8) {
2803 r->u8[i] = AES_sbox[a->u8[i]];
2804 }
2805 }
2806
2807 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2808 {
2809 ppc_avr_t result;
2810 int i;
2811
2812 VECTOR_FOR_INORDER_I(i, u32) {
2813 result.VsrW(i) = b->VsrW(i) ^
2814 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2815 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2816 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2817 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2818 }
2819 *r = result;
2820 }
2821
2822 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2823 {
2824 ppc_avr_t result;
2825 int i;
2826
2827 VECTOR_FOR_INORDER_I(i, u8) {
2828 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2829 }
2830 *r = result;
2831 }
2832
2833 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2834 {
2835 /* This differs from what is written in ISA V2.07. The RTL is */
2836 /* incorrect and will be fixed in V2.07B. */
2837 int i;
2838 ppc_avr_t tmp;
2839
2840 VECTOR_FOR_INORDER_I(i, u8) {
2841 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2842 }
2843
2844 VECTOR_FOR_INORDER_I(i, u32) {
2845 r->VsrW(i) =
2846 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2847 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2848 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2849 AES_imc[tmp.VsrB(4 * i + 3)][3];
2850 }
2851 }
2852
2853 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2854 {
2855 ppc_avr_t result;
2856 int i;
2857
2858 VECTOR_FOR_INORDER_I(i, u8) {
2859 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2860 }
2861 *r = result;
2862 }
2863
2864 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2865 {
2866 int st = (st_six & 0x10) != 0;
2867 int six = st_six & 0xF;
2868 int i;
2869
2870 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2871 if (st == 0) {
2872 if ((six & (0x8 >> i)) == 0) {
2873 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2874 ror32(a->VsrW(i), 18) ^
2875 (a->VsrW(i) >> 3);
2876 } else { /* six.bit[i] == 1 */
2877 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2878 ror32(a->VsrW(i), 19) ^
2879 (a->VsrW(i) >> 10);
2880 }
2881 } else { /* st == 1 */
2882 if ((six & (0x8 >> i)) == 0) {
2883 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2884 ror32(a->VsrW(i), 13) ^
2885 ror32(a->VsrW(i), 22);
2886 } else { /* six.bit[i] == 1 */
2887 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2888 ror32(a->VsrW(i), 11) ^
2889 ror32(a->VsrW(i), 25);
2890 }
2891 }
2892 }
2893 }
2894
2895 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2896 {
2897 int st = (st_six & 0x10) != 0;
2898 int six = st_six & 0xF;
2899 int i;
2900
2901 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2902 if (st == 0) {
2903 if ((six & (0x8 >> (2 * i))) == 0) {
2904 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2905 ror64(a->VsrD(i), 8) ^
2906 (a->VsrD(i) >> 7);
2907 } else { /* six.bit[2*i] == 1 */
2908 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2909 ror64(a->VsrD(i), 61) ^
2910 (a->VsrD(i) >> 6);
2911 }
2912 } else { /* st == 1 */
2913 if ((six & (0x8 >> (2 * i))) == 0) {
2914 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2915 ror64(a->VsrD(i), 34) ^
2916 ror64(a->VsrD(i), 39);
2917 } else { /* six.bit[2*i] == 1 */
2918 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2919 ror64(a->VsrD(i), 18) ^
2920 ror64(a->VsrD(i), 41);
2921 }
2922 }
2923 }
2924 }
2925
2926 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2927 {
2928 ppc_avr_t result;
2929 int i;
2930
2931 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2932 int indexA = c->VsrB(i) >> 4;
2933 int indexB = c->VsrB(i) & 0xF;
2934
2935 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2936 }
2937 *r = result;
2938 }
2939
2940 #undef VECTOR_FOR_INORDER_I
2941
2942 /*****************************************************************************/
2943 /* SPE extension helpers */
2944 /* Use a table to make this quicker */
2945 static const uint8_t hbrev[16] = {
2946 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2947 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2948 };
2949
2950 static inline uint8_t byte_reverse(uint8_t val)
2951 {
2952 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2953 }
2954
2955 static inline uint32_t word_reverse(uint32_t val)
2956 {
2957 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2958 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2959 }
2960
2961 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2962 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2963 {
2964 uint32_t a, b, d, mask;
2965
2966 mask = UINT32_MAX >> (32 - MASKBITS);
2967 a = arg1 & mask;
2968 b = arg2 & mask;
2969 d = word_reverse(1 + word_reverse(a | ~b));
2970 return (arg1 & ~mask) | (d & b);
2971 }
2972
2973 uint32_t helper_cntlsw32(uint32_t val)
2974 {
2975 if (val & 0x80000000) {
2976 return clz32(~val);
2977 } else {
2978 return clz32(val);
2979 }
2980 }
2981
2982 uint32_t helper_cntlzw32(uint32_t val)
2983 {
2984 return clz32(val);
2985 }
2986
2987 /* 440 specific */
2988 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2989 target_ulong low, uint32_t update_Rc)
2990 {
2991 target_ulong mask;
2992 int i;
2993
2994 i = 1;
2995 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2996 if ((high & mask) == 0) {
2997 if (update_Rc) {
2998 env->crf[0] = 0x4;
2999 }
3000 goto done;
3001 }
3002 i++;
3003 }
3004 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3005 if ((low & mask) == 0) {
3006 if (update_Rc) {
3007 env->crf[0] = 0x8;
3008 }
3009 goto done;
3010 }
3011 i++;
3012 }
3013 i = 8;
3014 if (update_Rc) {
3015 env->crf[0] = 0x2;
3016 }
3017 done:
3018 env->xer = (env->xer & ~0x7F) | i;
3019 if (update_Rc) {
3020 env->crf[0] |= xer_so;
3021 }
3022 return i;
3023 }