]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
Merge remote-tracking branch 'remotes/legoater/tags/pull-ppc-20220302' into staging
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
32
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
36
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 {
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
43 }
44 }
45
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
48 {
49 uint64_t rt = 0;
50 int overflow = 0;
51
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
54
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
60 }
61
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
64 }
65
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
68 }
69
70 return (target_ulong)rt;
71 }
72
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
75 {
76 int64_t rt = 0;
77 int overflow = 0;
78
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
81
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
88 }
89
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
92 }
93
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
96 }
97
98 return (target_ulong)rt;
99 }
100
101 #if defined(TARGET_PPC64)
102
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 {
105 uint64_t rt = 0;
106 int overflow = 0;
107
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
113 }
114
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
117 }
118
119 return rt;
120 }
121
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 {
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
128
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
134 }
135
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
138 }
139
140 return rt;
141 }
142
143 #endif
144
145
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
149
150 /*
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 */
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 {
163 return hasvalue(rb, ra) ? CRF_GT : 0;
164 }
165
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
169
170 /*
171 * Return a random number.
172 */
173 uint64_t helper_darn32(void)
174 {
175 Error *err = NULL;
176 uint32_t ret;
177
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
183 }
184
185 return ret;
186 }
187
188 uint64_t helper_darn64(void)
189 {
190 Error *err = NULL;
191 uint64_t ret;
192
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
198 }
199
200 return ret;
201 }
202
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 {
205 int i;
206 uint64_t ra = 0;
207
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
213 }
214 }
215 }
216 return ra;
217 }
218
219 #endif
220
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 {
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
226
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
230 }
231 mask <<= 8;
232 }
233 return ra;
234 }
235
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
239 {
240 int32_t ret;
241
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
250 }
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
254 }
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
258 }
259 return (target_long)ret;
260 }
261
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
265 {
266 int64_t ret;
267
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
276 }
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
280 }
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
284 }
285 return ret;
286 }
287 #endif
288
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
291 {
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
300 }
301
302 target_ulong helper_popcntw(target_ulong val)
303 {
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
316 }
317 #else
318 target_ulong helper_popcntb(target_ulong val)
319 {
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
325 }
326 #endif
327
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
329 {
330 /*
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
335 */
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
339
340 if (mask == 0 || mask == -1) {
341 return src;
342 }
343
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
350 }
351
352 /*
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
356 */
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
362 }
363
364 /*
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
369 */
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
375 }
376
377 /*
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
380 */
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
385 }
386
387 return left | (right >> n);
388 }
389
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 {
392 int i, o;
393 uint64_t result = 0;
394
395 if (mask == -1) {
396 return src;
397 }
398
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
403 }
404
405 return result;
406 }
407
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 {
410 int i, o;
411 uint64_t result = 0;
412
413 if (mask == -1) {
414 return src;
415 }
416
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
421 }
422
423 return result;
424 }
425
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if defined(HOST_WORDS_BIGENDIAN)
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
435
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
439 { \
440 to_type r; \
441 \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
450 } \
451 return r; \
452 }
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
455 { \
456 to_type r; \
457 \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
463 } \
464 return r; \
465 }
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
478
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 {
481 ppc_store_vscr(env, vscr);
482 }
483
484 uint32_t helper_mfvscr(CPUPPCState *env)
485 {
486 return ppc_get_vscr(env);
487 }
488
489 static inline void set_vscr_sat(CPUPPCState *env)
490 {
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
493 }
494
495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 {
497 int i;
498
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
501 }
502 }
503
504 /* vprtybw */
505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 {
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
512 }
513 }
514
515 /* vprtybd */
516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 {
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
524 }
525 }
526
527 /* vprtybq */
528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 {
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
536 }
537
538 #define VARITHFP(suffix, func) \
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
541 { \
542 int i; \
543 \
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
546 } \
547 }
548 VARITHFP(addfp, float32_add)
549 VARITHFP(subfp, float32_sub)
550 VARITHFP(minfp, float32_min)
551 VARITHFP(maxfp, float32_max)
552 #undef VARITHFP
553
554 #define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
557 { \
558 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
562 } \
563 }
564 VARITHFPFMA(maddfp, 0);
565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566 #undef VARITHFPFMA
567
568 #define VARITHSAT_CASE(type, op, cvt, element) \
569 { \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
572 }
573
574 #define VARITHSAT_DO(name, op, optype, cvt, element) \
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 { \
578 int sat = 0; \
579 int i; \
580 \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
582 VARITHSAT_CASE(optype, op, cvt, element); \
583 } \
584 if (sat) { \
585 vscr_sat->u32[0] = 1; \
586 } \
587 }
588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600 #undef VARITHSAT_CASE
601 #undef VARITHSAT_DO
602 #undef VARITHSAT_SIGNED
603 #undef VARITHSAT_UNSIGNED
604
605 #define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 { \
608 int i; \
609 \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
613 } \
614 }
615
616 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620 VAVG(b, s8, int16_t, u8, uint16_t)
621 VAVG(h, s16, int32_t, u16, uint32_t)
622 VAVG(w, s32, int64_t, u32, uint64_t)
623 #undef VAVG_DO
624 #undef VAVG
625
626 #define VABSDU_DO(name, element) \
627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 int i; \
630 \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
635 } \
636 }
637
638 /*
639 * VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
642 */
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645 VABSDU(b, u8)
646 VABSDU(h, u16)
647 VABSDU(w, u32)
648 #undef VABSDU_DO
649 #undef VABSDU
650
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
654 { \
655 int i; \
656 \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
660 } \
661 }
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
664 #undef VCF
665
666 #define VCMPNEZ(NAME, ELEM) \
667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 { \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
672 } \
673 }
674 VCMPNEZ(VCMPNEZB, u8)
675 VCMPNEZ(VCMPNEZH, u16)
676 VCMPNEZ(VCMPNEZW, u32)
677 #undef VCMPNEZ
678
679 #define VCMPFP_DO(suffix, compare, order, record) \
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
682 { \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
687 \
688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
689 uint32_t result; \
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
699 } \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
703 } \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
706 } \
707 }
708 #define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711 VCMPFP(eqfp, ==, float_relation_equal)
712 VCMPFP(gefp, !=, float_relation_less)
713 VCMPFP(gtfp, ==, float_relation_greater)
714 #undef VCMPFP_DO
715 #undef VCMPFP
716
717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
719 {
720 int i;
721 int all_in = 0;
722
723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 all_in = 1;
729 } else {
730 float32 bneg = float32_chs(b->f32[i]);
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
735
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
738 }
739 }
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
742 }
743 }
744
745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 0);
748 }
749
750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
752 {
753 vcmpbfp_internal(env, r, a, b, 1);
754 }
755
756 #define VCT(suffix, satcvt, element) \
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
759 { \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
763 \
764 set_float_rounding_mode(float_round_to_zero, &s); \
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
767 r->element[i] = 0; \
768 } else { \
769 float64 t = float32_to_float64(b->f32[i], &s); \
770 int64_t j; \
771 \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
775 } \
776 } \
777 if (sat) { \
778 set_vscr_sat(env); \
779 } \
780 }
781 VCT(uxs, cvtsduw, u32)
782 VCT(sxs, cvtsdsw, s32)
783 #undef VCT
784
785 target_ulong helper_vclzlsbb(ppc_avr_t *r)
786 {
787 target_ulong count = 0;
788 int i;
789 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
790 if (r->VsrB(i) & 0x01) {
791 break;
792 }
793 count++;
794 }
795 return count;
796 }
797
798 target_ulong helper_vctzlsbb(ppc_avr_t *r)
799 {
800 target_ulong count = 0;
801 int i;
802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
803 if (r->VsrB(i) & 0x01) {
804 break;
805 }
806 count++;
807 }
808 return count;
809 }
810
811 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
812 ppc_avr_t *b, ppc_avr_t *c)
813 {
814 int sat = 0;
815 int i;
816
817 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
818 int32_t prod = a->s16[i] * b->s16[i];
819 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
820
821 r->s16[i] = cvtswsh(t, &sat);
822 }
823
824 if (sat) {
825 set_vscr_sat(env);
826 }
827 }
828
829 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
830 ppc_avr_t *b, ppc_avr_t *c)
831 {
832 int sat = 0;
833 int i;
834
835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
836 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
837 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
838 r->s16[i] = cvtswsh(t, &sat);
839 }
840
841 if (sat) {
842 set_vscr_sat(env);
843 }
844 }
845
846 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
847 {
848 int i;
849
850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
851 int32_t prod = a->s16[i] * b->s16[i];
852 r->s16[i] = (int16_t) (prod + c->s16[i]);
853 }
854 }
855
856 #define VMRG_DO(name, element, access, ofs) \
857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
858 { \
859 ppc_avr_t result; \
860 int i, half = ARRAY_SIZE(r->element) / 2; \
861 \
862 for (i = 0; i < half; i++) { \
863 result.access(i * 2 + 0) = a->access(i + ofs); \
864 result.access(i * 2 + 1) = b->access(i + ofs); \
865 } \
866 *r = result; \
867 }
868
869 #define VMRG(suffix, element, access) \
870 VMRG_DO(mrgl##suffix, element, access, half) \
871 VMRG_DO(mrgh##suffix, element, access, 0)
872 VMRG(b, u8, VsrB)
873 VMRG(h, u16, VsrH)
874 VMRG(w, u32, VsrW)
875 #undef VMRG_DO
876 #undef VMRG
877
878 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b, ppc_avr_t *c)
880 {
881 int32_t prod[16];
882 int i;
883
884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
885 prod[i] = (int32_t)a->s8[i] * b->u8[i];
886 }
887
888 VECTOR_FOR_INORDER_I(i, s32) {
889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
890 prod[4 * i + 2] + prod[4 * i + 3];
891 }
892 }
893
894 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
896 {
897 int32_t prod[8];
898 int i;
899
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 prod[i] = a->s16[i] * b->s16[i];
902 }
903
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
906 }
907 }
908
909 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
911 {
912 int32_t prod[8];
913 int i;
914 int sat = 0;
915
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = (int32_t)a->s16[i] * b->s16[i];
918 }
919
920 VECTOR_FOR_INORDER_I(i, s32) {
921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922
923 r->u32[i] = cvtsdsw(t, &sat);
924 }
925
926 if (sat) {
927 set_vscr_sat(env);
928 }
929 }
930
931 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
932 ppc_avr_t *b, ppc_avr_t *c)
933 {
934 uint16_t prod[16];
935 int i;
936
937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
938 prod[i] = a->u8[i] * b->u8[i];
939 }
940
941 VECTOR_FOR_INORDER_I(i, u32) {
942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
943 prod[4 * i + 2] + prod[4 * i + 3];
944 }
945 }
946
947 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
949 {
950 uint32_t prod[8];
951 int i;
952
953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
954 prod[i] = a->u16[i] * b->u16[i];
955 }
956
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
959 }
960 }
961
962 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
963 ppc_avr_t *b, ppc_avr_t *c)
964 {
965 uint32_t prod[8];
966 int i;
967 int sat = 0;
968
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
971 }
972
973 VECTOR_FOR_INORDER_I(i, s32) {
974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975
976 r->u32[i] = cvtuduw(t, &sat);
977 }
978
979 if (sat) {
980 set_vscr_sat(env);
981 }
982 }
983
984 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
985 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 { \
987 int i; \
988 \
989 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
990 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
991 (cast)b->mul_access(i); \
992 } \
993 }
994
995 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
996 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
997 { \
998 int i; \
999 \
1000 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1001 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1002 (cast)b->mul_access(i + 1); \
1003 } \
1004 }
1005
1006 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1008 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1009 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1010 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1011 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1012 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1013 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1014 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1015 #undef VMUL_DO_EVN
1016 #undef VMUL_DO_ODD
1017 #undef VMUL
1018
1019 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1020 target_ulong uim)
1021 {
1022 int i, idx;
1023 ppc_vsr_t tmp = { .u64 = {0, 0} };
1024
1025 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1026 if ((pcv->VsrB(i) >> 5) == uim) {
1027 idx = pcv->VsrB(i) & 0x1f;
1028 if (idx < ARRAY_SIZE(t->u8)) {
1029 tmp.VsrB(i) = s0->VsrB(idx);
1030 } else {
1031 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1032 }
1033 }
1034 }
1035
1036 *t = tmp;
1037 }
1038
1039 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1040 {
1041 ppc_avr_t result;
1042 int i;
1043
1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1045 int s = c->VsrB(i) & 0x1f;
1046 int index = s & 0xf;
1047
1048 if (s & 0x10) {
1049 result.VsrB(i) = b->VsrB(index);
1050 } else {
1051 result.VsrB(i) = a->VsrB(index);
1052 }
1053 }
1054 *r = result;
1055 }
1056
1057 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1058 {
1059 ppc_avr_t result;
1060 int i;
1061
1062 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1063 int s = c->VsrB(i) & 0x1f;
1064 int index = 15 - (s & 0xf);
1065
1066 if (s & 0x10) {
1067 result.VsrB(i) = a->VsrB(index);
1068 } else {
1069 result.VsrB(i) = b->VsrB(index);
1070 }
1071 }
1072 *r = result;
1073 }
1074
1075 #define XXGENPCV(NAME, SZ) \
1076 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1077 { \
1078 ppc_vsr_t tmp; \
1079 \
1080 /* Initialize tmp with the result of an all-zeros mask */ \
1081 tmp.VsrD(0) = 0x1011121314151617; \
1082 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1083 \
1084 /* Iterate over the most significant byte of each element */ \
1085 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1086 if (b->VsrB(i) & 0x80) { \
1087 /* Update each byte of the element */ \
1088 for (int k = 0; k < SZ; k++) { \
1089 tmp.VsrB(i + k) = j + k; \
1090 } \
1091 j += SZ; \
1092 } \
1093 } \
1094 \
1095 *t = tmp; \
1096 } \
1097 \
1098 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1099 { \
1100 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1101 \
1102 /* Iterate over the most significant byte of each element */ \
1103 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1104 if (b->VsrB(i) & 0x80) { \
1105 /* Update each byte of the element */ \
1106 for (int k = 0; k < SZ; k++) { \
1107 tmp.VsrB(j + k) = i + k; \
1108 } \
1109 j += SZ; \
1110 } \
1111 } \
1112 \
1113 *t = tmp; \
1114 } \
1115 \
1116 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1117 { \
1118 ppc_vsr_t tmp; \
1119 \
1120 /* Initialize tmp with the result of an all-zeros mask */ \
1121 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1122 tmp.VsrD(1) = 0x1716151413121110; \
1123 \
1124 /* Iterate over the most significant byte of each element */ \
1125 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1126 /* Reverse indexing of "i" */ \
1127 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1128 if (b->VsrB(idx) & 0x80) { \
1129 /* Update each byte of the element */ \
1130 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1131 tmp.VsrB(idx + rk) = j + k; \
1132 } \
1133 j += SZ; \
1134 } \
1135 } \
1136 \
1137 *t = tmp; \
1138 } \
1139 \
1140 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1141 { \
1142 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1143 \
1144 /* Iterate over the most significant byte of each element */ \
1145 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1146 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1147 /* Update each byte of the element */ \
1148 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1149 /* Reverse indexing of "j" */ \
1150 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1151 tmp.VsrB(idx + rk) = i + k; \
1152 } \
1153 j += SZ; \
1154 } \
1155 } \
1156 \
1157 *t = tmp; \
1158 }
1159
1160 XXGENPCV(XXGENPCVBM, 1)
1161 XXGENPCV(XXGENPCVHM, 2)
1162 XXGENPCV(XXGENPCVWM, 4)
1163 XXGENPCV(XXGENPCVDM, 8)
1164 #undef XXGENPCV
1165
1166 #if defined(HOST_WORDS_BIGENDIAN)
1167 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1168 #define VBPERMD_INDEX(i) (i)
1169 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1170 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1171 #else
1172 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1173 #define VBPERMD_INDEX(i) (1 - i)
1174 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1175 #define EXTRACT_BIT(avr, i, index) \
1176 (extract64((avr)->u64[1 - i], 63 - index, 1))
1177 #endif
1178
1179 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1180 {
1181 int i, j;
1182 ppc_avr_t result = { .u64 = { 0, 0 } };
1183 VECTOR_FOR_INORDER_I(i, u64) {
1184 for (j = 0; j < 8; j++) {
1185 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1186 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1187 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1188 }
1189 }
1190 }
1191 *r = result;
1192 }
1193
1194 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1195 {
1196 int i;
1197 uint64_t perm = 0;
1198
1199 VECTOR_FOR_INORDER_I(i, u8) {
1200 int index = VBPERMQ_INDEX(b, i);
1201
1202 if (index < 128) {
1203 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1204 if (a->u64[VBPERMQ_DW(index)] & mask) {
1205 perm |= (0x8000 >> i);
1206 }
1207 }
1208 }
1209
1210 r->VsrD(0) = perm;
1211 r->VsrD(1) = 0;
1212 }
1213
1214 #undef VBPERMQ_INDEX
1215 #undef VBPERMQ_DW
1216
1217 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1218 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1219 { \
1220 int i, j; \
1221 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1222 \
1223 VECTOR_FOR_INORDER_I(i, srcfld) { \
1224 prod[i] = 0; \
1225 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1226 if (a->srcfld[i] & (1ull << j)) { \
1227 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1228 } \
1229 } \
1230 } \
1231 \
1232 VECTOR_FOR_INORDER_I(i, trgfld) { \
1233 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1234 } \
1235 }
1236
1237 PMSUM(vpmsumb, u8, u16, uint16_t)
1238 PMSUM(vpmsumh, u16, u32, uint32_t)
1239 PMSUM(vpmsumw, u32, u64, uint64_t)
1240
1241 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1242 {
1243
1244 #ifdef CONFIG_INT128
1245 int i, j;
1246 __uint128_t prod[2];
1247
1248 VECTOR_FOR_INORDER_I(i, u64) {
1249 prod[i] = 0;
1250 for (j = 0; j < 64; j++) {
1251 if (a->u64[i] & (1ull << j)) {
1252 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1253 }
1254 }
1255 }
1256
1257 r->u128 = prod[0] ^ prod[1];
1258
1259 #else
1260 int i, j;
1261 ppc_avr_t prod[2];
1262
1263 VECTOR_FOR_INORDER_I(i, u64) {
1264 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1265 for (j = 0; j < 64; j++) {
1266 if (a->u64[i] & (1ull << j)) {
1267 ppc_avr_t bshift;
1268 if (j == 0) {
1269 bshift.VsrD(0) = 0;
1270 bshift.VsrD(1) = b->u64[i];
1271 } else {
1272 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1273 bshift.VsrD(1) = b->u64[i] << j;
1274 }
1275 prod[i].VsrD(1) ^= bshift.VsrD(1);
1276 prod[i].VsrD(0) ^= bshift.VsrD(0);
1277 }
1278 }
1279 }
1280
1281 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1282 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1283 #endif
1284 }
1285
1286
1287 #if defined(HOST_WORDS_BIGENDIAN)
1288 #define PKBIG 1
1289 #else
1290 #define PKBIG 0
1291 #endif
1292 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1293 {
1294 int i, j;
1295 ppc_avr_t result;
1296 #if defined(HOST_WORDS_BIGENDIAN)
1297 const ppc_avr_t *x[2] = { a, b };
1298 #else
1299 const ppc_avr_t *x[2] = { b, a };
1300 #endif
1301
1302 VECTOR_FOR_INORDER_I(i, u64) {
1303 VECTOR_FOR_INORDER_I(j, u32) {
1304 uint32_t e = x[i]->u32[j];
1305
1306 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1307 ((e >> 6) & 0x3e0) |
1308 ((e >> 3) & 0x1f));
1309 }
1310 }
1311 *r = result;
1312 }
1313
1314 #define VPK(suffix, from, to, cvt, dosat) \
1315 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1316 ppc_avr_t *a, ppc_avr_t *b) \
1317 { \
1318 int i; \
1319 int sat = 0; \
1320 ppc_avr_t result; \
1321 ppc_avr_t *a0 = PKBIG ? a : b; \
1322 ppc_avr_t *a1 = PKBIG ? b : a; \
1323 \
1324 VECTOR_FOR_INORDER_I(i, from) { \
1325 result.to[i] = cvt(a0->from[i], &sat); \
1326 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1327 } \
1328 *r = result; \
1329 if (dosat && sat) { \
1330 set_vscr_sat(env); \
1331 } \
1332 }
1333 #define I(x, y) (x)
1334 VPK(shss, s16, s8, cvtshsb, 1)
1335 VPK(shus, s16, u8, cvtshub, 1)
1336 VPK(swss, s32, s16, cvtswsh, 1)
1337 VPK(swus, s32, u16, cvtswuh, 1)
1338 VPK(sdss, s64, s32, cvtsdsw, 1)
1339 VPK(sdus, s64, u32, cvtsduw, 1)
1340 VPK(uhus, u16, u8, cvtuhub, 1)
1341 VPK(uwus, u32, u16, cvtuwuh, 1)
1342 VPK(udus, u64, u32, cvtuduw, 1)
1343 VPK(uhum, u16, u8, I, 0)
1344 VPK(uwum, u32, u16, I, 0)
1345 VPK(udum, u64, u32, I, 0)
1346 #undef I
1347 #undef VPK
1348 #undef PKBIG
1349
1350 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1351 {
1352 int i;
1353
1354 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1355 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1356 }
1357 }
1358
1359 #define VRFI(suffix, rounding) \
1360 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1361 ppc_avr_t *b) \
1362 { \
1363 int i; \
1364 float_status s = env->vec_status; \
1365 \
1366 set_float_rounding_mode(rounding, &s); \
1367 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1368 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1369 } \
1370 }
1371 VRFI(n, float_round_nearest_even)
1372 VRFI(m, float_round_down)
1373 VRFI(p, float_round_up)
1374 VRFI(z, float_round_to_zero)
1375 #undef VRFI
1376
1377 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1378 {
1379 int i;
1380
1381 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1382 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1383
1384 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1385 }
1386 }
1387
1388 #define VRLMI(name, size, element, insert) \
1389 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1390 { \
1391 int i; \
1392 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1393 uint##size##_t src1 = a->element[i]; \
1394 uint##size##_t src2 = b->element[i]; \
1395 uint##size##_t src3 = r->element[i]; \
1396 uint##size##_t begin, end, shift, mask, rot_val; \
1397 \
1398 shift = extract##size(src2, 0, 6); \
1399 end = extract##size(src2, 8, 6); \
1400 begin = extract##size(src2, 16, 6); \
1401 rot_val = rol##size(src1, shift); \
1402 mask = mask_u##size(begin, end); \
1403 if (insert) { \
1404 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1405 } else { \
1406 r->element[i] = (rot_val & mask); \
1407 } \
1408 } \
1409 }
1410
1411 VRLMI(VRLDMI, 64, u64, 1);
1412 VRLMI(VRLWMI, 32, u32, 1);
1413 VRLMI(VRLDNM, 64, u64, 0);
1414 VRLMI(VRLWNM, 32, u32, 0);
1415
1416 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1417 {
1418 int i;
1419
1420 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1421 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1422 }
1423 }
1424
1425 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1426 {
1427 int i;
1428
1429 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1430 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1431 }
1432 }
1433
1434 #define VEXTU_X_DO(name, size, left) \
1435 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1436 { \
1437 int index = (a & 0xf) * 8; \
1438 if (left) { \
1439 index = 128 - index - size; \
1440 } \
1441 return int128_getlo(int128_rshift(b->s128, index)) & \
1442 MAKE_64BIT_MASK(0, size); \
1443 }
1444 VEXTU_X_DO(vextublx, 8, 1)
1445 VEXTU_X_DO(vextuhlx, 16, 1)
1446 VEXTU_X_DO(vextuwlx, 32, 1)
1447 VEXTU_X_DO(vextubrx, 8, 0)
1448 VEXTU_X_DO(vextuhrx, 16, 0)
1449 VEXTU_X_DO(vextuwrx, 32, 0)
1450 #undef VEXTU_X_DO
1451
1452 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1453 {
1454 int i;
1455 unsigned int shift, bytes, size;
1456
1457 size = ARRAY_SIZE(r->u8);
1458 for (i = 0; i < size; i++) {
1459 shift = b->VsrB(i) & 0x7; /* extract shift value */
1460 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1461 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1462 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1463 }
1464 }
1465
1466 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1467 {
1468 int i;
1469 unsigned int shift, bytes;
1470
1471 /*
1472 * Use reverse order, as destination and source register can be
1473 * same. Its being modified in place saving temporary, reverse
1474 * order will guarantee that computed result is not fed back.
1475 */
1476 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1477 shift = b->VsrB(i) & 0x7; /* extract shift value */
1478 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1479 /* extract adjacent bytes */
1480 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1481 }
1482 }
1483
1484 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1485 {
1486 int sh = shift & 0xf;
1487 int i;
1488 ppc_avr_t result;
1489
1490 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1491 int index = sh + i;
1492 if (index > 0xf) {
1493 result.VsrB(i) = b->VsrB(index - 0x10);
1494 } else {
1495 result.VsrB(i) = a->VsrB(index);
1496 }
1497 }
1498 *r = result;
1499 }
1500
1501 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1502 {
1503 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1504
1505 #if defined(HOST_WORDS_BIGENDIAN)
1506 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1507 memset(&r->u8[16 - sh], 0, sh);
1508 #else
1509 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1510 memset(&r->u8[0], 0, sh);
1511 #endif
1512 }
1513
1514 #if defined(HOST_WORDS_BIGENDIAN)
1515 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1516 #else
1517 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1518 #endif
1519
1520 #define VINSX(SUFFIX, TYPE) \
1521 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1522 uint64_t val, target_ulong index) \
1523 { \
1524 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1525 target_long idx = index; \
1526 \
1527 if (idx < 0 || idx > maxidx) { \
1528 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1529 qemu_log_mask(LOG_GUEST_ERROR, \
1530 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1531 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1532 } else { \
1533 TYPE src = val; \
1534 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1535 } \
1536 }
1537 VINSX(B, uint8_t)
1538 VINSX(H, uint16_t)
1539 VINSX(W, uint32_t)
1540 VINSX(D, uint64_t)
1541 #undef ELEM_ADDR
1542 #undef VINSX
1543 #if defined(HOST_WORDS_BIGENDIAN)
1544 #define VEXTDVLX(NAME, SIZE) \
1545 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1546 target_ulong index) \
1547 { \
1548 const target_long idx = index; \
1549 ppc_avr_t tmp[2] = { *a, *b }; \
1550 memset(t, 0, sizeof(*t)); \
1551 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1552 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1553 } else { \
1554 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1555 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1556 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1557 } \
1558 }
1559 #else
1560 #define VEXTDVLX(NAME, SIZE) \
1561 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1562 target_ulong index) \
1563 { \
1564 const target_long idx = index; \
1565 ppc_avr_t tmp[2] = { *b, *a }; \
1566 memset(t, 0, sizeof(*t)); \
1567 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1568 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1569 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1570 } else { \
1571 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1572 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1573 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1574 } \
1575 }
1576 #endif
1577 VEXTDVLX(VEXTDUBVLX, 1)
1578 VEXTDVLX(VEXTDUHVLX, 2)
1579 VEXTDVLX(VEXTDUWVLX, 4)
1580 VEXTDVLX(VEXTDDVLX, 8)
1581 #undef VEXTDVLX
1582 #if defined(HOST_WORDS_BIGENDIAN)
1583 #define VEXTRACT(suffix, element) \
1584 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1585 { \
1586 uint32_t es = sizeof(r->element[0]); \
1587 memmove(&r->u8[8 - es], &b->u8[index], es); \
1588 memset(&r->u8[8], 0, 8); \
1589 memset(&r->u8[0], 0, 8 - es); \
1590 }
1591 #else
1592 #define VEXTRACT(suffix, element) \
1593 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1594 { \
1595 uint32_t es = sizeof(r->element[0]); \
1596 uint32_t s = (16 - index) - es; \
1597 memmove(&r->u8[8], &b->u8[s], es); \
1598 memset(&r->u8[0], 0, 8); \
1599 memset(&r->u8[8 + es], 0, 8 - es); \
1600 }
1601 #endif
1602 VEXTRACT(ub, u8)
1603 VEXTRACT(uh, u16)
1604 VEXTRACT(uw, u32)
1605 VEXTRACT(d, u64)
1606 #undef VEXTRACT
1607
1608 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1609 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1610 { \
1611 int i, idx, crf = 0; \
1612 \
1613 for (i = 0; i < NUM_ELEMS; i++) { \
1614 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1615 if (b->Vsr##ELEM(idx)) { \
1616 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1617 } else { \
1618 crf = 0b0010; \
1619 break; \
1620 } \
1621 } \
1622 \
1623 for (; i < NUM_ELEMS; i++) { \
1624 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1625 t->Vsr##ELEM(idx) = 0; \
1626 } \
1627 \
1628 return crf; \
1629 }
1630 VSTRI(VSTRIBL, B, 16, true)
1631 VSTRI(VSTRIBR, B, 16, false)
1632 VSTRI(VSTRIHL, H, 8, true)
1633 VSTRI(VSTRIHR, H, 8, false)
1634 #undef VSTRI
1635
1636 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1637 ppc_vsr_t *xb, uint32_t index)
1638 {
1639 ppc_vsr_t t = { };
1640 size_t es = sizeof(uint32_t);
1641 uint32_t ext_index;
1642 int i;
1643
1644 ext_index = index;
1645 for (i = 0; i < es; i++, ext_index++) {
1646 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1647 }
1648
1649 *xt = t;
1650 }
1651
1652 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1653 ppc_vsr_t *xb, uint32_t index)
1654 {
1655 ppc_vsr_t t = *xt;
1656 size_t es = sizeof(uint32_t);
1657 int ins_index, i = 0;
1658
1659 ins_index = index;
1660 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1661 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1662 }
1663
1664 *xt = t;
1665 }
1666
1667 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1668 uint32_t desc)
1669 {
1670 /*
1671 * Instead of processing imm bit-by-bit, we'll skip the computation of
1672 * conjunctions whose corresponding bit is unset.
1673 */
1674 int bit, imm = simd_data(desc);
1675 Int128 conj, disj = int128_zero();
1676
1677 /* Iterate over set bits from the least to the most significant bit */
1678 while (imm) {
1679 /*
1680 * Get the next bit to be processed with ctz64. Invert the result of
1681 * ctz64 to match the indexing used by PowerISA.
1682 */
1683 bit = 7 - ctzl(imm);
1684 if (bit & 0x4) {
1685 conj = a->s128;
1686 } else {
1687 conj = int128_not(a->s128);
1688 }
1689 if (bit & 0x2) {
1690 conj = int128_and(conj, b->s128);
1691 } else {
1692 conj = int128_and(conj, int128_not(b->s128));
1693 }
1694 if (bit & 0x1) {
1695 conj = int128_and(conj, c->s128);
1696 } else {
1697 conj = int128_and(conj, int128_not(c->s128));
1698 }
1699 disj = int128_or(disj, conj);
1700
1701 /* Unset the least significant bit that is set */
1702 imm &= imm - 1;
1703 }
1704
1705 t->s128 = disj;
1706 }
1707
1708 #define XXBLEND(name, sz) \
1709 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1710 ppc_avr_t *c, uint32_t desc) \
1711 { \
1712 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1713 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1714 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1715 } \
1716 }
1717 XXBLEND(B, 8)
1718 XXBLEND(H, 16)
1719 XXBLEND(W, 32)
1720 XXBLEND(D, 64)
1721 #undef XXBLEND
1722
1723 #define VNEG(name, element) \
1724 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1725 { \
1726 int i; \
1727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1728 r->element[i] = -b->element[i]; \
1729 } \
1730 }
1731 VNEG(vnegw, s32)
1732 VNEG(vnegd, s64)
1733 #undef VNEG
1734
1735 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1736 {
1737 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1738
1739 #if defined(HOST_WORDS_BIGENDIAN)
1740 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1741 memset(&r->u8[0], 0, sh);
1742 #else
1743 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1744 memset(&r->u8[16 - sh], 0, sh);
1745 #endif
1746 }
1747
1748 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1749 {
1750 int i;
1751
1752 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1753 r->u32[i] = a->u32[i] >= b->u32[i];
1754 }
1755 }
1756
1757 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1758 {
1759 int64_t t;
1760 int i, upper;
1761 ppc_avr_t result;
1762 int sat = 0;
1763
1764 upper = ARRAY_SIZE(r->s32) - 1;
1765 t = (int64_t)b->VsrSW(upper);
1766 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1767 t += a->VsrSW(i);
1768 result.VsrSW(i) = 0;
1769 }
1770 result.VsrSW(upper) = cvtsdsw(t, &sat);
1771 *r = result;
1772
1773 if (sat) {
1774 set_vscr_sat(env);
1775 }
1776 }
1777
1778 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1779 {
1780 int i, j, upper;
1781 ppc_avr_t result;
1782 int sat = 0;
1783
1784 upper = 1;
1785 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1786 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1787
1788 result.VsrD(i) = 0;
1789 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1790 t += a->VsrSW(2 * i + j);
1791 }
1792 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1793 }
1794
1795 *r = result;
1796 if (sat) {
1797 set_vscr_sat(env);
1798 }
1799 }
1800
1801 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1802 {
1803 int i, j;
1804 int sat = 0;
1805
1806 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1807 int64_t t = (int64_t)b->s32[i];
1808
1809 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1810 t += a->s8[4 * i + j];
1811 }
1812 r->s32[i] = cvtsdsw(t, &sat);
1813 }
1814
1815 if (sat) {
1816 set_vscr_sat(env);
1817 }
1818 }
1819
1820 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1821 {
1822 int sat = 0;
1823 int i;
1824
1825 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1826 int64_t t = (int64_t)b->s32[i];
1827
1828 t += a->s16[2 * i] + a->s16[2 * i + 1];
1829 r->s32[i] = cvtsdsw(t, &sat);
1830 }
1831
1832 if (sat) {
1833 set_vscr_sat(env);
1834 }
1835 }
1836
1837 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1838 {
1839 int i, j;
1840 int sat = 0;
1841
1842 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1843 uint64_t t = (uint64_t)b->u32[i];
1844
1845 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1846 t += a->u8[4 * i + j];
1847 }
1848 r->u32[i] = cvtuduw(t, &sat);
1849 }
1850
1851 if (sat) {
1852 set_vscr_sat(env);
1853 }
1854 }
1855
1856 #if defined(HOST_WORDS_BIGENDIAN)
1857 #define UPKHI 1
1858 #define UPKLO 0
1859 #else
1860 #define UPKHI 0
1861 #define UPKLO 1
1862 #endif
1863 #define VUPKPX(suffix, hi) \
1864 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1865 { \
1866 int i; \
1867 ppc_avr_t result; \
1868 \
1869 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1870 uint16_t e = b->u16[hi ? i : i + 4]; \
1871 uint8_t a = (e >> 15) ? 0xff : 0; \
1872 uint8_t r = (e >> 10) & 0x1f; \
1873 uint8_t g = (e >> 5) & 0x1f; \
1874 uint8_t b = e & 0x1f; \
1875 \
1876 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1877 } \
1878 *r = result; \
1879 }
1880 VUPKPX(lpx, UPKLO)
1881 VUPKPX(hpx, UPKHI)
1882 #undef VUPKPX
1883
1884 #define VUPK(suffix, unpacked, packee, hi) \
1885 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1886 { \
1887 int i; \
1888 ppc_avr_t result; \
1889 \
1890 if (hi) { \
1891 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1892 result.unpacked[i] = b->packee[i]; \
1893 } \
1894 } else { \
1895 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1896 i++) { \
1897 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1898 } \
1899 } \
1900 *r = result; \
1901 }
1902 VUPK(hsb, s16, s8, UPKHI)
1903 VUPK(hsh, s32, s16, UPKHI)
1904 VUPK(hsw, s64, s32, UPKHI)
1905 VUPK(lsb, s16, s8, UPKLO)
1906 VUPK(lsh, s32, s16, UPKLO)
1907 VUPK(lsw, s64, s32, UPKLO)
1908 #undef VUPK
1909 #undef UPKHI
1910 #undef UPKLO
1911
1912 #define VGENERIC_DO(name, element) \
1913 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1914 { \
1915 int i; \
1916 \
1917 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1918 r->element[i] = name(b->element[i]); \
1919 } \
1920 }
1921
1922 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1923 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1924
1925 VGENERIC_DO(clzb, u8)
1926 VGENERIC_DO(clzh, u16)
1927
1928 #undef clzb
1929 #undef clzh
1930
1931 #define ctzb(v) ((v) ? ctz32(v) : 8)
1932 #define ctzh(v) ((v) ? ctz32(v) : 16)
1933 #define ctzw(v) ctz32((v))
1934 #define ctzd(v) ctz64((v))
1935
1936 VGENERIC_DO(ctzb, u8)
1937 VGENERIC_DO(ctzh, u16)
1938 VGENERIC_DO(ctzw, u32)
1939 VGENERIC_DO(ctzd, u64)
1940
1941 #undef ctzb
1942 #undef ctzh
1943 #undef ctzw
1944 #undef ctzd
1945
1946 #define popcntb(v) ctpop8(v)
1947 #define popcnth(v) ctpop16(v)
1948 #define popcntw(v) ctpop32(v)
1949 #define popcntd(v) ctpop64(v)
1950
1951 VGENERIC_DO(popcntb, u8)
1952 VGENERIC_DO(popcnth, u16)
1953 VGENERIC_DO(popcntw, u32)
1954 VGENERIC_DO(popcntd, u64)
1955
1956 #undef popcntb
1957 #undef popcnth
1958 #undef popcntw
1959 #undef popcntd
1960
1961 #undef VGENERIC_DO
1962
1963 #if defined(HOST_WORDS_BIGENDIAN)
1964 #define QW_ONE { .u64 = { 0, 1 } }
1965 #else
1966 #define QW_ONE { .u64 = { 1, 0 } }
1967 #endif
1968
1969 #ifndef CONFIG_INT128
1970
1971 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1972 {
1973 t->u64[0] = ~a.u64[0];
1974 t->u64[1] = ~a.u64[1];
1975 }
1976
1977 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1978 {
1979 if (a.VsrD(0) < b.VsrD(0)) {
1980 return -1;
1981 } else if (a.VsrD(0) > b.VsrD(0)) {
1982 return 1;
1983 } else if (a.VsrD(1) < b.VsrD(1)) {
1984 return -1;
1985 } else if (a.VsrD(1) > b.VsrD(1)) {
1986 return 1;
1987 } else {
1988 return 0;
1989 }
1990 }
1991
1992 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1993 {
1994 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1995 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1996 (~a.VsrD(1) < b.VsrD(1));
1997 }
1998
1999 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2000 {
2001 ppc_avr_t not_a;
2002 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2003 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2004 (~a.VsrD(1) < b.VsrD(1));
2005 avr_qw_not(&not_a, a);
2006 return avr_qw_cmpu(not_a, b) < 0;
2007 }
2008
2009 #endif
2010
2011 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2012 {
2013 #ifdef CONFIG_INT128
2014 r->u128 = a->u128 + b->u128;
2015 #else
2016 avr_qw_add(r, *a, *b);
2017 #endif
2018 }
2019
2020 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2021 {
2022 #ifdef CONFIG_INT128
2023 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2024 #else
2025
2026 if (c->VsrD(1) & 1) {
2027 ppc_avr_t tmp;
2028
2029 tmp.VsrD(0) = 0;
2030 tmp.VsrD(1) = c->VsrD(1) & 1;
2031 avr_qw_add(&tmp, *a, tmp);
2032 avr_qw_add(r, tmp, *b);
2033 } else {
2034 avr_qw_add(r, *a, *b);
2035 }
2036 #endif
2037 }
2038
2039 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2040 {
2041 #ifdef CONFIG_INT128
2042 r->u128 = (~a->u128 < b->u128);
2043 #else
2044 ppc_avr_t not_a;
2045
2046 avr_qw_not(&not_a, *a);
2047
2048 r->VsrD(0) = 0;
2049 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2050 #endif
2051 }
2052
2053 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2054 {
2055 #ifdef CONFIG_INT128
2056 int carry_out = (~a->u128 < b->u128);
2057 if (!carry_out && (c->u128 & 1)) {
2058 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2059 ((a->u128 != 0) || (b->u128 != 0));
2060 }
2061 r->u128 = carry_out;
2062 #else
2063
2064 int carry_in = c->VsrD(1) & 1;
2065 int carry_out = 0;
2066 ppc_avr_t tmp;
2067
2068 carry_out = avr_qw_addc(&tmp, *a, *b);
2069
2070 if (!carry_out && carry_in) {
2071 ppc_avr_t one = QW_ONE;
2072 carry_out = avr_qw_addc(&tmp, tmp, one);
2073 }
2074 r->VsrD(0) = 0;
2075 r->VsrD(1) = carry_out;
2076 #endif
2077 }
2078
2079 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2080 {
2081 #ifdef CONFIG_INT128
2082 r->u128 = a->u128 - b->u128;
2083 #else
2084 ppc_avr_t tmp;
2085 ppc_avr_t one = QW_ONE;
2086
2087 avr_qw_not(&tmp, *b);
2088 avr_qw_add(&tmp, *a, tmp);
2089 avr_qw_add(r, tmp, one);
2090 #endif
2091 }
2092
2093 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2094 {
2095 #ifdef CONFIG_INT128
2096 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2097 #else
2098 ppc_avr_t tmp, sum;
2099
2100 avr_qw_not(&tmp, *b);
2101 avr_qw_add(&sum, *a, tmp);
2102
2103 tmp.VsrD(0) = 0;
2104 tmp.VsrD(1) = c->VsrD(1) & 1;
2105 avr_qw_add(r, sum, tmp);
2106 #endif
2107 }
2108
2109 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2110 {
2111 #ifdef CONFIG_INT128
2112 r->u128 = (~a->u128 < ~b->u128) ||
2113 (a->u128 + ~b->u128 == (__uint128_t)-1);
2114 #else
2115 int carry = (avr_qw_cmpu(*a, *b) > 0);
2116 if (!carry) {
2117 ppc_avr_t tmp;
2118 avr_qw_not(&tmp, *b);
2119 avr_qw_add(&tmp, *a, tmp);
2120 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2121 }
2122 r->VsrD(0) = 0;
2123 r->VsrD(1) = carry;
2124 #endif
2125 }
2126
2127 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2128 {
2129 #ifdef CONFIG_INT128
2130 r->u128 =
2131 (~a->u128 < ~b->u128) ||
2132 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2133 #else
2134 int carry_in = c->VsrD(1) & 1;
2135 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2136 if (!carry_out && carry_in) {
2137 ppc_avr_t tmp;
2138 avr_qw_not(&tmp, *b);
2139 avr_qw_add(&tmp, *a, tmp);
2140 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2141 }
2142
2143 r->VsrD(0) = 0;
2144 r->VsrD(1) = carry_out;
2145 #endif
2146 }
2147
2148 #define BCD_PLUS_PREF_1 0xC
2149 #define BCD_PLUS_PREF_2 0xF
2150 #define BCD_PLUS_ALT_1 0xA
2151 #define BCD_NEG_PREF 0xD
2152 #define BCD_NEG_ALT 0xB
2153 #define BCD_PLUS_ALT_2 0xE
2154 #define NATIONAL_PLUS 0x2B
2155 #define NATIONAL_NEG 0x2D
2156
2157 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2158
2159 static int bcd_get_sgn(ppc_avr_t *bcd)
2160 {
2161 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2162 case BCD_PLUS_PREF_1:
2163 case BCD_PLUS_PREF_2:
2164 case BCD_PLUS_ALT_1:
2165 case BCD_PLUS_ALT_2:
2166 {
2167 return 1;
2168 }
2169
2170 case BCD_NEG_PREF:
2171 case BCD_NEG_ALT:
2172 {
2173 return -1;
2174 }
2175
2176 default:
2177 {
2178 return 0;
2179 }
2180 }
2181 }
2182
2183 static int bcd_preferred_sgn(int sgn, int ps)
2184 {
2185 if (sgn >= 0) {
2186 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2187 } else {
2188 return BCD_NEG_PREF;
2189 }
2190 }
2191
2192 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2193 {
2194 uint8_t result;
2195 if (n & 1) {
2196 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2197 } else {
2198 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2199 }
2200
2201 if (unlikely(result > 9)) {
2202 *invalid = true;
2203 }
2204 return result;
2205 }
2206
2207 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2208 {
2209 if (n & 1) {
2210 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2211 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2212 } else {
2213 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2214 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2215 }
2216 }
2217
2218 static bool bcd_is_valid(ppc_avr_t *bcd)
2219 {
2220 int i;
2221 int invalid = 0;
2222
2223 if (bcd_get_sgn(bcd) == 0) {
2224 return false;
2225 }
2226
2227 for (i = 1; i < 32; i++) {
2228 bcd_get_digit(bcd, i, &invalid);
2229 if (unlikely(invalid)) {
2230 return false;
2231 }
2232 }
2233 return true;
2234 }
2235
2236 static int bcd_cmp_zero(ppc_avr_t *bcd)
2237 {
2238 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2239 return CRF_EQ;
2240 } else {
2241 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2242 }
2243 }
2244
2245 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2246 {
2247 return reg->VsrH(7 - n);
2248 }
2249
2250 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2251 {
2252 reg->VsrH(7 - n) = val;
2253 }
2254
2255 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2256 {
2257 int i;
2258 int invalid = 0;
2259 for (i = 31; i > 0; i--) {
2260 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2261 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2262 if (unlikely(invalid)) {
2263 return 0; /* doesn't matter */
2264 } else if (dig_a > dig_b) {
2265 return 1;
2266 } else if (dig_a < dig_b) {
2267 return -1;
2268 }
2269 }
2270
2271 return 0;
2272 }
2273
2274 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2275 int *overflow)
2276 {
2277 int carry = 0;
2278 int i;
2279 int is_zero = 1;
2280
2281 for (i = 1; i <= 31; i++) {
2282 uint8_t digit = bcd_get_digit(a, i, invalid) +
2283 bcd_get_digit(b, i, invalid) + carry;
2284 is_zero &= (digit == 0);
2285 if (digit > 9) {
2286 carry = 1;
2287 digit -= 10;
2288 } else {
2289 carry = 0;
2290 }
2291
2292 bcd_put_digit(t, digit, i);
2293 }
2294
2295 *overflow = carry;
2296 return is_zero;
2297 }
2298
2299 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2300 int *overflow)
2301 {
2302 int carry = 0;
2303 int i;
2304
2305 for (i = 1; i <= 31; i++) {
2306 uint8_t digit = bcd_get_digit(a, i, invalid) -
2307 bcd_get_digit(b, i, invalid) + carry;
2308 if (digit & 0x80) {
2309 carry = -1;
2310 digit += 10;
2311 } else {
2312 carry = 0;
2313 }
2314
2315 bcd_put_digit(t, digit, i);
2316 }
2317
2318 *overflow = carry;
2319 }
2320
2321 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2322 {
2323
2324 int sgna = bcd_get_sgn(a);
2325 int sgnb = bcd_get_sgn(b);
2326 int invalid = (sgna == 0) || (sgnb == 0);
2327 int overflow = 0;
2328 int zero = 0;
2329 uint32_t cr = 0;
2330 ppc_avr_t result = { .u64 = { 0, 0 } };
2331
2332 if (!invalid) {
2333 if (sgna == sgnb) {
2334 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2335 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2336 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2337 } else {
2338 int magnitude = bcd_cmp_mag(a, b);
2339 if (magnitude > 0) {
2340 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2341 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2342 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2343 } else if (magnitude < 0) {
2344 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2345 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2346 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2347 } else {
2348 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2349 cr = CRF_EQ;
2350 }
2351 }
2352 }
2353
2354 if (unlikely(invalid)) {
2355 result.VsrD(0) = result.VsrD(1) = -1;
2356 cr = CRF_SO;
2357 } else if (overflow) {
2358 cr |= CRF_SO;
2359 } else if (zero) {
2360 cr |= CRF_EQ;
2361 }
2362
2363 *r = result;
2364
2365 return cr;
2366 }
2367
2368 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2369 {
2370 ppc_avr_t bcopy = *b;
2371 int sgnb = bcd_get_sgn(b);
2372 if (sgnb < 0) {
2373 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2374 } else if (sgnb > 0) {
2375 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2376 }
2377 /* else invalid ... defer to bcdadd code for proper handling */
2378
2379 return helper_bcdadd(r, a, &bcopy, ps);
2380 }
2381
2382 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2383 {
2384 int i;
2385 int cr = 0;
2386 uint16_t national = 0;
2387 uint16_t sgnb = get_national_digit(b, 0);
2388 ppc_avr_t ret = { .u64 = { 0, 0 } };
2389 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2390
2391 for (i = 1; i < 8; i++) {
2392 national = get_national_digit(b, i);
2393 if (unlikely(national < 0x30 || national > 0x39)) {
2394 invalid = 1;
2395 break;
2396 }
2397
2398 bcd_put_digit(&ret, national & 0xf, i);
2399 }
2400
2401 if (sgnb == NATIONAL_PLUS) {
2402 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2403 } else {
2404 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2405 }
2406
2407 cr = bcd_cmp_zero(&ret);
2408
2409 if (unlikely(invalid)) {
2410 cr = CRF_SO;
2411 }
2412
2413 *r = ret;
2414
2415 return cr;
2416 }
2417
2418 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2419 {
2420 int i;
2421 int cr = 0;
2422 int sgnb = bcd_get_sgn(b);
2423 int invalid = (sgnb == 0);
2424 ppc_avr_t ret = { .u64 = { 0, 0 } };
2425
2426 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2427
2428 for (i = 1; i < 8; i++) {
2429 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2430
2431 if (unlikely(invalid)) {
2432 break;
2433 }
2434 }
2435 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2436
2437 cr = bcd_cmp_zero(b);
2438
2439 if (ox_flag) {
2440 cr |= CRF_SO;
2441 }
2442
2443 if (unlikely(invalid)) {
2444 cr = CRF_SO;
2445 }
2446
2447 *r = ret;
2448
2449 return cr;
2450 }
2451
2452 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2453 {
2454 int i;
2455 int cr = 0;
2456 int invalid = 0;
2457 int zone_digit = 0;
2458 int zone_lead = ps ? 0xF : 0x3;
2459 int digit = 0;
2460 ppc_avr_t ret = { .u64 = { 0, 0 } };
2461 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2462
2463 if (unlikely((sgnb < 0xA) && ps)) {
2464 invalid = 1;
2465 }
2466
2467 for (i = 0; i < 16; i++) {
2468 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2469 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2470 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2471 invalid = 1;
2472 break;
2473 }
2474
2475 bcd_put_digit(&ret, digit, i + 1);
2476 }
2477
2478 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2479 (!ps && (sgnb & 0x4))) {
2480 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2481 } else {
2482 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2483 }
2484
2485 cr = bcd_cmp_zero(&ret);
2486
2487 if (unlikely(invalid)) {
2488 cr = CRF_SO;
2489 }
2490
2491 *r = ret;
2492
2493 return cr;
2494 }
2495
2496 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2497 {
2498 int i;
2499 int cr = 0;
2500 uint8_t digit = 0;
2501 int sgnb = bcd_get_sgn(b);
2502 int zone_lead = (ps) ? 0xF0 : 0x30;
2503 int invalid = (sgnb == 0);
2504 ppc_avr_t ret = { .u64 = { 0, 0 } };
2505
2506 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2507
2508 for (i = 0; i < 16; i++) {
2509 digit = bcd_get_digit(b, i + 1, &invalid);
2510
2511 if (unlikely(invalid)) {
2512 break;
2513 }
2514
2515 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2516 }
2517
2518 if (ps) {
2519 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2520 } else {
2521 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2522 }
2523
2524 cr = bcd_cmp_zero(b);
2525
2526 if (ox_flag) {
2527 cr |= CRF_SO;
2528 }
2529
2530 if (unlikely(invalid)) {
2531 cr = CRF_SO;
2532 }
2533
2534 *r = ret;
2535
2536 return cr;
2537 }
2538
2539 /**
2540 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2541 *
2542 * Returns:
2543 * > 0 if ahi|alo > bhi|blo,
2544 * 0 if ahi|alo == bhi|blo,
2545 * < 0 if ahi|alo < bhi|blo
2546 */
2547 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2548 uint64_t blo, uint64_t bhi)
2549 {
2550 return (ahi == bhi) ?
2551 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2552 (ahi > bhi ? 1 : -1);
2553 }
2554
2555 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2556 {
2557 int i;
2558 int cr;
2559 uint64_t lo_value;
2560 uint64_t hi_value;
2561 uint64_t rem;
2562 ppc_avr_t ret = { .u64 = { 0, 0 } };
2563
2564 if (b->VsrSD(0) < 0) {
2565 lo_value = -b->VsrSD(1);
2566 hi_value = ~b->VsrD(0) + !lo_value;
2567 bcd_put_digit(&ret, 0xD, 0);
2568
2569 cr = CRF_LT;
2570 } else {
2571 lo_value = b->VsrD(1);
2572 hi_value = b->VsrD(0);
2573 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2574
2575 if (hi_value == 0 && lo_value == 0) {
2576 cr = CRF_EQ;
2577 } else {
2578 cr = CRF_GT;
2579 }
2580 }
2581
2582 /*
2583 * Check src limits: abs(src) <= 10^31 - 1
2584 *
2585 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2586 */
2587 if (ucmp128(lo_value, hi_value,
2588 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2589 cr |= CRF_SO;
2590
2591 /*
2592 * According to the ISA, if src wouldn't fit in the destination
2593 * register, the result is undefined.
2594 * In that case, we leave r unchanged.
2595 */
2596 } else {
2597 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2598
2599 for (i = 1; i < 16; rem /= 10, i++) {
2600 bcd_put_digit(&ret, rem % 10, i);
2601 }
2602
2603 for (; i < 32; lo_value /= 10, i++) {
2604 bcd_put_digit(&ret, lo_value % 10, i);
2605 }
2606
2607 *r = ret;
2608 }
2609
2610 return cr;
2611 }
2612
2613 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2614 {
2615 uint8_t i;
2616 int cr;
2617 uint64_t carry;
2618 uint64_t unused;
2619 uint64_t lo_value;
2620 uint64_t hi_value = 0;
2621 int sgnb = bcd_get_sgn(b);
2622 int invalid = (sgnb == 0);
2623
2624 lo_value = bcd_get_digit(b, 31, &invalid);
2625 for (i = 30; i > 0; i--) {
2626 mulu64(&lo_value, &carry, lo_value, 10ULL);
2627 mulu64(&hi_value, &unused, hi_value, 10ULL);
2628 lo_value += bcd_get_digit(b, i, &invalid);
2629 hi_value += carry;
2630
2631 if (unlikely(invalid)) {
2632 break;
2633 }
2634 }
2635
2636 if (sgnb == -1) {
2637 r->VsrSD(1) = -lo_value;
2638 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2639 } else {
2640 r->VsrSD(1) = lo_value;
2641 r->VsrSD(0) = hi_value;
2642 }
2643
2644 cr = bcd_cmp_zero(b);
2645
2646 if (unlikely(invalid)) {
2647 cr = CRF_SO;
2648 }
2649
2650 return cr;
2651 }
2652
2653 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2654 {
2655 int i;
2656 int invalid = 0;
2657
2658 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2659 return CRF_SO;
2660 }
2661
2662 *r = *a;
2663 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2664
2665 for (i = 1; i < 32; i++) {
2666 bcd_get_digit(a, i, &invalid);
2667 bcd_get_digit(b, i, &invalid);
2668 if (unlikely(invalid)) {
2669 return CRF_SO;
2670 }
2671 }
2672
2673 return bcd_cmp_zero(r);
2674 }
2675
2676 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2677 {
2678 int sgnb = bcd_get_sgn(b);
2679
2680 *r = *b;
2681 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2682
2683 if (bcd_is_valid(b) == false) {
2684 return CRF_SO;
2685 }
2686
2687 return bcd_cmp_zero(r);
2688 }
2689
2690 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2691 {
2692 int cr;
2693 int i = a->VsrSB(7);
2694 bool ox_flag = false;
2695 int sgnb = bcd_get_sgn(b);
2696 ppc_avr_t ret = *b;
2697 ret.VsrD(1) &= ~0xf;
2698
2699 if (bcd_is_valid(b) == false) {
2700 return CRF_SO;
2701 }
2702
2703 if (unlikely(i > 31)) {
2704 i = 31;
2705 } else if (unlikely(i < -31)) {
2706 i = -31;
2707 }
2708
2709 if (i > 0) {
2710 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2711 } else {
2712 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2713 }
2714 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2715
2716 *r = ret;
2717
2718 cr = bcd_cmp_zero(r);
2719 if (ox_flag) {
2720 cr |= CRF_SO;
2721 }
2722
2723 return cr;
2724 }
2725
2726 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2727 {
2728 int cr;
2729 int i;
2730 int invalid = 0;
2731 bool ox_flag = false;
2732 ppc_avr_t ret = *b;
2733
2734 for (i = 0; i < 32; i++) {
2735 bcd_get_digit(b, i, &invalid);
2736
2737 if (unlikely(invalid)) {
2738 return CRF_SO;
2739 }
2740 }
2741
2742 i = a->VsrSB(7);
2743 if (i >= 32) {
2744 ox_flag = true;
2745 ret.VsrD(1) = ret.VsrD(0) = 0;
2746 } else if (i <= -32) {
2747 ret.VsrD(1) = ret.VsrD(0) = 0;
2748 } else if (i > 0) {
2749 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2750 } else {
2751 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2752 }
2753 *r = ret;
2754
2755 cr = bcd_cmp_zero(r);
2756 if (ox_flag) {
2757 cr |= CRF_SO;
2758 }
2759
2760 return cr;
2761 }
2762
2763 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2764 {
2765 int cr;
2766 int unused = 0;
2767 int invalid = 0;
2768 bool ox_flag = false;
2769 int sgnb = bcd_get_sgn(b);
2770 ppc_avr_t ret = *b;
2771 ret.VsrD(1) &= ~0xf;
2772
2773 int i = a->VsrSB(7);
2774 ppc_avr_t bcd_one;
2775
2776 bcd_one.VsrD(0) = 0;
2777 bcd_one.VsrD(1) = 0x10;
2778
2779 if (bcd_is_valid(b) == false) {
2780 return CRF_SO;
2781 }
2782
2783 if (unlikely(i > 31)) {
2784 i = 31;
2785 } else if (unlikely(i < -31)) {
2786 i = -31;
2787 }
2788
2789 if (i > 0) {
2790 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2791 } else {
2792 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2793
2794 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2795 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2796 }
2797 }
2798 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2799
2800 cr = bcd_cmp_zero(&ret);
2801 if (ox_flag) {
2802 cr |= CRF_SO;
2803 }
2804 *r = ret;
2805
2806 return cr;
2807 }
2808
2809 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2810 {
2811 uint64_t mask;
2812 uint32_t ox_flag = 0;
2813 int i = a->VsrSH(3) + 1;
2814 ppc_avr_t ret = *b;
2815
2816 if (bcd_is_valid(b) == false) {
2817 return CRF_SO;
2818 }
2819
2820 if (i > 16 && i < 32) {
2821 mask = (uint64_t)-1 >> (128 - i * 4);
2822 if (ret.VsrD(0) & ~mask) {
2823 ox_flag = CRF_SO;
2824 }
2825
2826 ret.VsrD(0) &= mask;
2827 } else if (i >= 0 && i <= 16) {
2828 mask = (uint64_t)-1 >> (64 - i * 4);
2829 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2830 ox_flag = CRF_SO;
2831 }
2832
2833 ret.VsrD(1) &= mask;
2834 ret.VsrD(0) = 0;
2835 }
2836 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2837 *r = ret;
2838
2839 return bcd_cmp_zero(&ret) | ox_flag;
2840 }
2841
2842 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2843 {
2844 int i;
2845 uint64_t mask;
2846 uint32_t ox_flag = 0;
2847 int invalid = 0;
2848 ppc_avr_t ret = *b;
2849
2850 for (i = 0; i < 32; i++) {
2851 bcd_get_digit(b, i, &invalid);
2852
2853 if (unlikely(invalid)) {
2854 return CRF_SO;
2855 }
2856 }
2857
2858 i = a->VsrSH(3);
2859 if (i > 16 && i < 33) {
2860 mask = (uint64_t)-1 >> (128 - i * 4);
2861 if (ret.VsrD(0) & ~mask) {
2862 ox_flag = CRF_SO;
2863 }
2864
2865 ret.VsrD(0) &= mask;
2866 } else if (i > 0 && i <= 16) {
2867 mask = (uint64_t)-1 >> (64 - i * 4);
2868 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2869 ox_flag = CRF_SO;
2870 }
2871
2872 ret.VsrD(1) &= mask;
2873 ret.VsrD(0) = 0;
2874 } else if (i == 0) {
2875 if (ret.VsrD(0) || ret.VsrD(1)) {
2876 ox_flag = CRF_SO;
2877 }
2878 ret.VsrD(0) = ret.VsrD(1) = 0;
2879 }
2880
2881 *r = ret;
2882 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2883 return ox_flag | CRF_EQ;
2884 }
2885
2886 return ox_flag | CRF_GT;
2887 }
2888
2889 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2890 {
2891 int i;
2892 VECTOR_FOR_INORDER_I(i, u8) {
2893 r->u8[i] = AES_sbox[a->u8[i]];
2894 }
2895 }
2896
2897 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2898 {
2899 ppc_avr_t result;
2900 int i;
2901
2902 VECTOR_FOR_INORDER_I(i, u32) {
2903 result.VsrW(i) = b->VsrW(i) ^
2904 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2905 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2906 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2907 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2908 }
2909 *r = result;
2910 }
2911
2912 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2913 {
2914 ppc_avr_t result;
2915 int i;
2916
2917 VECTOR_FOR_INORDER_I(i, u8) {
2918 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2919 }
2920 *r = result;
2921 }
2922
2923 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2924 {
2925 /* This differs from what is written in ISA V2.07. The RTL is */
2926 /* incorrect and will be fixed in V2.07B. */
2927 int i;
2928 ppc_avr_t tmp;
2929
2930 VECTOR_FOR_INORDER_I(i, u8) {
2931 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2932 }
2933
2934 VECTOR_FOR_INORDER_I(i, u32) {
2935 r->VsrW(i) =
2936 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2937 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2938 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2939 AES_imc[tmp.VsrB(4 * i + 3)][3];
2940 }
2941 }
2942
2943 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2944 {
2945 ppc_avr_t result;
2946 int i;
2947
2948 VECTOR_FOR_INORDER_I(i, u8) {
2949 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2950 }
2951 *r = result;
2952 }
2953
2954 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2955 {
2956 int st = (st_six & 0x10) != 0;
2957 int six = st_six & 0xF;
2958 int i;
2959
2960 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2961 if (st == 0) {
2962 if ((six & (0x8 >> i)) == 0) {
2963 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2964 ror32(a->VsrW(i), 18) ^
2965 (a->VsrW(i) >> 3);
2966 } else { /* six.bit[i] == 1 */
2967 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2968 ror32(a->VsrW(i), 19) ^
2969 (a->VsrW(i) >> 10);
2970 }
2971 } else { /* st == 1 */
2972 if ((six & (0x8 >> i)) == 0) {
2973 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2974 ror32(a->VsrW(i), 13) ^
2975 ror32(a->VsrW(i), 22);
2976 } else { /* six.bit[i] == 1 */
2977 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2978 ror32(a->VsrW(i), 11) ^
2979 ror32(a->VsrW(i), 25);
2980 }
2981 }
2982 }
2983 }
2984
2985 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2986 {
2987 int st = (st_six & 0x10) != 0;
2988 int six = st_six & 0xF;
2989 int i;
2990
2991 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2992 if (st == 0) {
2993 if ((six & (0x8 >> (2 * i))) == 0) {
2994 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2995 ror64(a->VsrD(i), 8) ^
2996 (a->VsrD(i) >> 7);
2997 } else { /* six.bit[2*i] == 1 */
2998 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2999 ror64(a->VsrD(i), 61) ^
3000 (a->VsrD(i) >> 6);
3001 }
3002 } else { /* st == 1 */
3003 if ((six & (0x8 >> (2 * i))) == 0) {
3004 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3005 ror64(a->VsrD(i), 34) ^
3006 ror64(a->VsrD(i), 39);
3007 } else { /* six.bit[2*i] == 1 */
3008 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3009 ror64(a->VsrD(i), 18) ^
3010 ror64(a->VsrD(i), 41);
3011 }
3012 }
3013 }
3014 }
3015
3016 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3017 {
3018 ppc_avr_t result;
3019 int i;
3020
3021 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3022 int indexA = c->VsrB(i) >> 4;
3023 int indexB = c->VsrB(i) & 0xF;
3024
3025 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3026 }
3027 *r = result;
3028 }
3029
3030 #undef VECTOR_FOR_INORDER_I
3031
3032 /*****************************************************************************/
3033 /* SPE extension helpers */
3034 /* Use a table to make this quicker */
3035 static const uint8_t hbrev[16] = {
3036 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3037 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3038 };
3039
3040 static inline uint8_t byte_reverse(uint8_t val)
3041 {
3042 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3043 }
3044
3045 static inline uint32_t word_reverse(uint32_t val)
3046 {
3047 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3048 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3049 }
3050
3051 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3052 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3053 {
3054 uint32_t a, b, d, mask;
3055
3056 mask = UINT32_MAX >> (32 - MASKBITS);
3057 a = arg1 & mask;
3058 b = arg2 & mask;
3059 d = word_reverse(1 + word_reverse(a | ~b));
3060 return (arg1 & ~mask) | (d & b);
3061 }
3062
3063 uint32_t helper_cntlsw32(uint32_t val)
3064 {
3065 if (val & 0x80000000) {
3066 return clz32(~val);
3067 } else {
3068 return clz32(val);
3069 }
3070 }
3071
3072 uint32_t helper_cntlzw32(uint32_t val)
3073 {
3074 return clz32(val);
3075 }
3076
3077 /* 440 specific */
3078 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3079 target_ulong low, uint32_t update_Rc)
3080 {
3081 target_ulong mask;
3082 int i;
3083
3084 i = 1;
3085 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3086 if ((high & mask) == 0) {
3087 if (update_Rc) {
3088 env->crf[0] = 0x4;
3089 }
3090 goto done;
3091 }
3092 i++;
3093 }
3094 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3095 if ((low & mask) == 0) {
3096 if (update_Rc) {
3097 env->crf[0] = 0x8;
3098 }
3099 goto done;
3100 }
3101 i++;
3102 }
3103 i = 8;
3104 if (update_Rc) {
3105 env->crf[0] = 0x2;
3106 }
3107 done:
3108 env->xer = (env->xer & ~0x7F) | i;
3109 if (update_Rc) {
3110 env->crf[0] |= xer_so;
3111 }
3112 return i;
3113 }