]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
target-ppc: Use ctpop helper
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "internal.h"
22 #include "exec/exec-all.h"
23 #include "qemu/host-utils.h"
24 #include "exec/helper-proto.h"
25 #include "crypto/aes.h"
26
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
30
31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
33 {
34 uint64_t rt = 0;
35 int overflow = 0;
36
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
39
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
45 }
46
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
49 }
50
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
56 }
57 }
58
59 return (target_ulong)rt;
60 }
61
62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
64 {
65 int64_t rt = 0;
66 int overflow = 0;
67
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
70
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
77 }
78
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
81 }
82
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
88 }
89 }
90
91 return (target_ulong)rt;
92 }
93
94 #if defined(TARGET_PPC64)
95
96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 {
98 uint64_t rt = 0;
99 int overflow = 0;
100
101 overflow = divu128(&rt, &ra, rb);
102
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
105 }
106
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
112 }
113 }
114
115 return rt;
116 }
117
118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 {
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
124
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
127 }
128
129 if (oe) {
130
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
135 }
136 }
137
138 return rt;
139 }
140
141 #endif
142
143
144 #if defined(TARGET_PPC64)
145 /* if x = 0xab, returns 0xababababababababa */
146 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
147
148 /* substract 1 from each byte, and with inverse, check if MSB is set at each
149 * byte.
150 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
151 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
152 */
153 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
154
155 /* When you XOR the pattern and there is a match, that byte will be zero */
156 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
157
158 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
159 {
160 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
161 }
162
163 #undef pattern
164 #undef haszero
165 #undef hasvalue
166
167 /* Return invalid random number.
168 *
169 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
170 * random number
171 */
172 target_ulong helper_darn32(void)
173 {
174 return -1;
175 }
176
177 target_ulong helper_darn64(void)
178 {
179 return -1;
180 }
181
182 #endif
183
184 #if defined(TARGET_PPC64)
185
186 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
187 {
188 int i;
189 uint64_t ra = 0;
190
191 for (i = 0; i < 8; i++) {
192 int index = (rs >> (i*8)) & 0xFF;
193 if (index < 64) {
194 if (rb & (1ull << (63-index))) {
195 ra |= 1 << i;
196 }
197 }
198 }
199 return ra;
200 }
201
202 #endif
203
204 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
205 {
206 target_ulong mask = 0xff;
207 target_ulong ra = 0;
208 int i;
209
210 for (i = 0; i < sizeof(target_ulong); i++) {
211 if ((rs & mask) == (rb & mask)) {
212 ra |= mask;
213 }
214 mask <<= 8;
215 }
216 return ra;
217 }
218
219 /* shift right arithmetic helper */
220 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
221 target_ulong shift)
222 {
223 int32_t ret;
224
225 if (likely(!(shift & 0x20))) {
226 if (likely((uint32_t)shift != 0)) {
227 shift &= 0x1f;
228 ret = (int32_t)value >> shift;
229 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
230 env->ca = 0;
231 } else {
232 env->ca = 1;
233 }
234 } else {
235 ret = (int32_t)value;
236 env->ca = 0;
237 }
238 } else {
239 ret = (int32_t)value >> 31;
240 env->ca = (ret != 0);
241 }
242 return (target_long)ret;
243 }
244
245 #if defined(TARGET_PPC64)
246 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
247 target_ulong shift)
248 {
249 int64_t ret;
250
251 if (likely(!(shift & 0x40))) {
252 if (likely((uint64_t)shift != 0)) {
253 shift &= 0x3f;
254 ret = (int64_t)value >> shift;
255 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
256 env->ca = 0;
257 } else {
258 env->ca = 1;
259 }
260 } else {
261 ret = (int64_t)value;
262 env->ca = 0;
263 }
264 } else {
265 ret = (int64_t)value >> 63;
266 env->ca = (ret != 0);
267 }
268 return ret;
269 }
270 #endif
271
272 #if defined(TARGET_PPC64)
273 target_ulong helper_popcntb(target_ulong val)
274 {
275 /* Note that we don't fold past bytes */
276 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
277 0x5555555555555555ULL);
278 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
279 0x3333333333333333ULL);
280 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
281 0x0f0f0f0f0f0f0f0fULL);
282 return val;
283 }
284
285 target_ulong helper_popcntw(target_ulong val)
286 {
287 /* Note that we don't fold past words. */
288 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
289 0x5555555555555555ULL);
290 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
291 0x3333333333333333ULL);
292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
293 0x0f0f0f0f0f0f0f0fULL);
294 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
295 0x00ff00ff00ff00ffULL);
296 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
297 0x0000ffff0000ffffULL);
298 return val;
299 }
300 #else
301 target_ulong helper_popcntb(target_ulong val)
302 {
303 /* Note that we don't fold past bytes */
304 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
305 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
306 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
307 return val;
308 }
309 #endif
310
311 /*****************************************************************************/
312 /* PowerPC 601 specific instructions (POWER bridge) */
313 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
314 {
315 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
316
317 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
318 (int32_t)arg2 == 0) {
319 env->spr[SPR_MQ] = 0;
320 return INT32_MIN;
321 } else {
322 env->spr[SPR_MQ] = tmp % arg2;
323 return tmp / (int32_t)arg2;
324 }
325 }
326
327 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
328 target_ulong arg2)
329 {
330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
331
332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333 (int32_t)arg2 == 0) {
334 env->so = env->ov = 1;
335 env->spr[SPR_MQ] = 0;
336 return INT32_MIN;
337 } else {
338 env->spr[SPR_MQ] = tmp % arg2;
339 tmp /= (int32_t)arg2;
340 if ((int32_t)tmp != tmp) {
341 env->so = env->ov = 1;
342 } else {
343 env->ov = 0;
344 }
345 return tmp;
346 }
347 }
348
349 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
350 target_ulong arg2)
351 {
352 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
353 (int32_t)arg2 == 0) {
354 env->spr[SPR_MQ] = 0;
355 return INT32_MIN;
356 } else {
357 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
358 return (int32_t)arg1 / (int32_t)arg2;
359 }
360 }
361
362 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
363 target_ulong arg2)
364 {
365 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
366 (int32_t)arg2 == 0) {
367 env->so = env->ov = 1;
368 env->spr[SPR_MQ] = 0;
369 return INT32_MIN;
370 } else {
371 env->ov = 0;
372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373 return (int32_t)arg1 / (int32_t)arg2;
374 }
375 }
376
377 /*****************************************************************************/
378 /* 602 specific instructions */
379 /* mfrom is the most crazy instruction ever seen, imho ! */
380 /* Real implementation uses a ROM table. Do the same */
381 /* Extremely decomposed:
382 * -arg / 256
383 * return 256 * log10(10 + 1.0) + 0.5
384 */
385 #if !defined(CONFIG_USER_ONLY)
386 target_ulong helper_602_mfrom(target_ulong arg)
387 {
388 if (likely(arg < 602)) {
389 #include "mfrom_table.c"
390 return mfrom_ROM_table[arg];
391 } else {
392 return 0;
393 }
394 }
395 #endif
396
397 /*****************************************************************************/
398 /* Altivec extension helpers */
399 #if defined(HOST_WORDS_BIGENDIAN)
400 #define HI_IDX 0
401 #define LO_IDX 1
402 #define AVRB(i) u8[i]
403 #define AVRW(i) u32[i]
404 #else
405 #define HI_IDX 1
406 #define LO_IDX 0
407 #define AVRB(i) u8[15-(i)]
408 #define AVRW(i) u32[3-(i)]
409 #endif
410
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414 #else
415 #define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
417 #endif
418
419 /* Saturating arithmetic helpers. */
420 #define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
422 { \
423 to_type r; \
424 \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
433 } \
434 return r; \
435 }
436 #define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
446 } \
447 return r; \
448 }
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
452
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459 #undef SATCVT
460 #undef SATCVTU
461
462 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
463 {
464 int i, j = (sh & 0xf);
465
466 VECTOR_FOR_INORDER_I(i, u8) {
467 r->u8[i] = j++;
468 }
469 }
470
471 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
472 {
473 int i, j = 0x10 - (sh & 0xf);
474
475 VECTOR_FOR_INORDER_I(i, u8) {
476 r->u8[i] = j++;
477 }
478 }
479
480 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
481 {
482 #if defined(HOST_WORDS_BIGENDIAN)
483 env->vscr = r->u32[3];
484 #else
485 env->vscr = r->u32[0];
486 #endif
487 set_flush_to_zero(vscr_nj, &env->vec_status);
488 }
489
490 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
491 {
492 int i;
493
494 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
495 r->u32[i] = ~a->u32[i] < b->u32[i];
496 }
497 }
498
499 /* vprtybw */
500 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
501 {
502 int i;
503 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
504 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
505 res ^= res >> 8;
506 r->u32[i] = res & 1;
507 }
508 }
509
510 /* vprtybd */
511 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
512 {
513 int i;
514 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
515 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
516 res ^= res >> 16;
517 res ^= res >> 8;
518 r->u64[i] = res & 1;
519 }
520 }
521
522 /* vprtybq */
523 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
524 {
525 uint64_t res = b->u64[0] ^ b->u64[1];
526 res ^= res >> 32;
527 res ^= res >> 16;
528 res ^= res >> 8;
529 r->u64[LO_IDX] = res & 1;
530 r->u64[HI_IDX] = 0;
531 }
532
533 #define VARITH_DO(name, op, element) \
534 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
535 { \
536 int i; \
537 \
538 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
539 r->element[i] = a->element[i] op b->element[i]; \
540 } \
541 }
542 #define VARITH(suffix, element) \
543 VARITH_DO(add##suffix, +, element) \
544 VARITH_DO(sub##suffix, -, element)
545 VARITH(ubm, u8)
546 VARITH(uhm, u16)
547 VARITH(uwm, u32)
548 VARITH(udm, u64)
549 VARITH_DO(muluwm, *, u32)
550 #undef VARITH_DO
551 #undef VARITH
552
553 #define VARITHFP(suffix, func) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b) \
556 { \
557 int i; \
558 \
559 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
560 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
561 } \
562 }
563 VARITHFP(addfp, float32_add)
564 VARITHFP(subfp, float32_sub)
565 VARITHFP(minfp, float32_min)
566 VARITHFP(maxfp, float32_max)
567 #undef VARITHFP
568
569 #define VARITHFPFMA(suffix, type) \
570 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
571 ppc_avr_t *b, ppc_avr_t *c) \
572 { \
573 int i; \
574 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
575 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
576 type, &env->vec_status); \
577 } \
578 }
579 VARITHFPFMA(maddfp, 0);
580 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
581 #undef VARITHFPFMA
582
583 #define VARITHSAT_CASE(type, op, cvt, element) \
584 { \
585 type result = (type)a->element[i] op (type)b->element[i]; \
586 r->element[i] = cvt(result, &sat); \
587 }
588
589 #define VARITHSAT_DO(name, op, optype, cvt, element) \
590 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
591 ppc_avr_t *b) \
592 { \
593 int sat = 0; \
594 int i; \
595 \
596 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
597 switch (sizeof(r->element[0])) { \
598 case 1: \
599 VARITHSAT_CASE(optype, op, cvt, element); \
600 break; \
601 case 2: \
602 VARITHSAT_CASE(optype, op, cvt, element); \
603 break; \
604 case 4: \
605 VARITHSAT_CASE(optype, op, cvt, element); \
606 break; \
607 } \
608 } \
609 if (sat) { \
610 env->vscr |= (1 << VSCR_SAT); \
611 } \
612 }
613 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
614 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
615 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
616 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
617 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
618 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
619 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
620 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
621 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
622 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
623 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
624 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
625 #undef VARITHSAT_CASE
626 #undef VARITHSAT_DO
627 #undef VARITHSAT_SIGNED
628 #undef VARITHSAT_UNSIGNED
629
630 #define VAVG_DO(name, element, etype) \
631 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
632 { \
633 int i; \
634 \
635 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
636 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
637 r->element[i] = x >> 1; \
638 } \
639 }
640
641 #define VAVG(type, signed_element, signed_type, unsigned_element, \
642 unsigned_type) \
643 VAVG_DO(avgs##type, signed_element, signed_type) \
644 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
645 VAVG(b, s8, int16_t, u8, uint16_t)
646 VAVG(h, s16, int32_t, u16, uint32_t)
647 VAVG(w, s32, int64_t, u32, uint64_t)
648 #undef VAVG_DO
649 #undef VAVG
650
651 #define VABSDU_DO(name, element) \
652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
653 { \
654 int i; \
655 \
656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
657 r->element[i] = (a->element[i] > b->element[i]) ? \
658 (a->element[i] - b->element[i]) : \
659 (b->element[i] - a->element[i]); \
660 } \
661 }
662
663 /* VABSDU - Vector absolute difference unsigned
664 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
665 * element - element type to access from vector
666 */
667 #define VABSDU(type, element) \
668 VABSDU_DO(absdu##type, element)
669 VABSDU(b, u8)
670 VABSDU(h, u16)
671 VABSDU(w, u32)
672 #undef VABSDU_DO
673 #undef VABSDU
674
675 #define VCF(suffix, cvt, element) \
676 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
677 ppc_avr_t *b, uint32_t uim) \
678 { \
679 int i; \
680 \
681 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
682 float32 t = cvt(b->element[i], &env->vec_status); \
683 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
684 } \
685 }
686 VCF(ux, uint32_to_float32, u32)
687 VCF(sx, int32_to_float32, s32)
688 #undef VCF
689
690 #define VCMP_DO(suffix, compare, element, record) \
691 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
692 ppc_avr_t *a, ppc_avr_t *b) \
693 { \
694 uint64_t ones = (uint64_t)-1; \
695 uint64_t all = ones; \
696 uint64_t none = 0; \
697 int i; \
698 \
699 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
700 uint64_t result = (a->element[i] compare b->element[i] ? \
701 ones : 0x0); \
702 switch (sizeof(a->element[0])) { \
703 case 8: \
704 r->u64[i] = result; \
705 break; \
706 case 4: \
707 r->u32[i] = result; \
708 break; \
709 case 2: \
710 r->u16[i] = result; \
711 break; \
712 case 1: \
713 r->u8[i] = result; \
714 break; \
715 } \
716 all &= result; \
717 none |= result; \
718 } \
719 if (record) { \
720 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
721 } \
722 }
723 #define VCMP(suffix, compare, element) \
724 VCMP_DO(suffix, compare, element, 0) \
725 VCMP_DO(suffix##_dot, compare, element, 1)
726 VCMP(equb, ==, u8)
727 VCMP(equh, ==, u16)
728 VCMP(equw, ==, u32)
729 VCMP(equd, ==, u64)
730 VCMP(gtub, >, u8)
731 VCMP(gtuh, >, u16)
732 VCMP(gtuw, >, u32)
733 VCMP(gtud, >, u64)
734 VCMP(gtsb, >, s8)
735 VCMP(gtsh, >, s16)
736 VCMP(gtsw, >, s32)
737 VCMP(gtsd, >, s64)
738 #undef VCMP_DO
739 #undef VCMP
740
741 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
742 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
743 ppc_avr_t *a, ppc_avr_t *b) \
744 { \
745 etype ones = (etype)-1; \
746 etype all = ones; \
747 etype result, none = 0; \
748 int i; \
749 \
750 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
751 if (cmpzero) { \
752 result = ((a->element[i] == 0) \
753 || (b->element[i] == 0) \
754 || (a->element[i] != b->element[i]) ? \
755 ones : 0x0); \
756 } else { \
757 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
758 } \
759 r->element[i] = result; \
760 all &= result; \
761 none |= result; \
762 } \
763 if (record) { \
764 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
765 } \
766 }
767
768 /* VCMPNEZ - Vector compare not equal to zero
769 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
770 * element - element type to access from vector
771 */
772 #define VCMPNE(suffix, element, etype, cmpzero) \
773 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
774 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
775 VCMPNE(zb, u8, uint8_t, 1)
776 VCMPNE(zh, u16, uint16_t, 1)
777 VCMPNE(zw, u32, uint32_t, 1)
778 VCMPNE(b, u8, uint8_t, 0)
779 VCMPNE(h, u16, uint16_t, 0)
780 VCMPNE(w, u32, uint32_t, 0)
781 #undef VCMPNE_DO
782 #undef VCMPNE
783
784 #define VCMPFP_DO(suffix, compare, order, record) \
785 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
786 ppc_avr_t *a, ppc_avr_t *b) \
787 { \
788 uint32_t ones = (uint32_t)-1; \
789 uint32_t all = ones; \
790 uint32_t none = 0; \
791 int i; \
792 \
793 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
794 uint32_t result; \
795 int rel = float32_compare_quiet(a->f[i], b->f[i], \
796 &env->vec_status); \
797 if (rel == float_relation_unordered) { \
798 result = 0; \
799 } else if (rel compare order) { \
800 result = ones; \
801 } else { \
802 result = 0; \
803 } \
804 r->u32[i] = result; \
805 all &= result; \
806 none |= result; \
807 } \
808 if (record) { \
809 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
810 } \
811 }
812 #define VCMPFP(suffix, compare, order) \
813 VCMPFP_DO(suffix, compare, order, 0) \
814 VCMPFP_DO(suffix##_dot, compare, order, 1)
815 VCMPFP(eqfp, ==, float_relation_equal)
816 VCMPFP(gefp, !=, float_relation_less)
817 VCMPFP(gtfp, ==, float_relation_greater)
818 #undef VCMPFP_DO
819 #undef VCMPFP
820
821 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
822 ppc_avr_t *a, ppc_avr_t *b, int record)
823 {
824 int i;
825 int all_in = 0;
826
827 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
828 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
829 if (le_rel == float_relation_unordered) {
830 r->u32[i] = 0xc0000000;
831 all_in = 1;
832 } else {
833 float32 bneg = float32_chs(b->f[i]);
834 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
835 int le = le_rel != float_relation_greater;
836 int ge = ge_rel != float_relation_less;
837
838 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
839 all_in |= (!le | !ge);
840 }
841 }
842 if (record) {
843 env->crf[6] = (all_in == 0) << 1;
844 }
845 }
846
847 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
848 {
849 vcmpbfp_internal(env, r, a, b, 0);
850 }
851
852 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
853 ppc_avr_t *b)
854 {
855 vcmpbfp_internal(env, r, a, b, 1);
856 }
857
858 #define VCT(suffix, satcvt, element) \
859 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
860 ppc_avr_t *b, uint32_t uim) \
861 { \
862 int i; \
863 int sat = 0; \
864 float_status s = env->vec_status; \
865 \
866 set_float_rounding_mode(float_round_to_zero, &s); \
867 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
868 if (float32_is_any_nan(b->f[i])) { \
869 r->element[i] = 0; \
870 } else { \
871 float64 t = float32_to_float64(b->f[i], &s); \
872 int64_t j; \
873 \
874 t = float64_scalbn(t, uim, &s); \
875 j = float64_to_int64(t, &s); \
876 r->element[i] = satcvt(j, &sat); \
877 } \
878 } \
879 if (sat) { \
880 env->vscr |= (1 << VSCR_SAT); \
881 } \
882 }
883 VCT(uxs, cvtsduw, u32)
884 VCT(sxs, cvtsdsw, s32)
885 #undef VCT
886
887 target_ulong helper_vclzlsbb(ppc_avr_t *r)
888 {
889 target_ulong count = 0;
890 int i;
891 VECTOR_FOR_INORDER_I(i, u8) {
892 if (r->u8[i] & 0x01) {
893 break;
894 }
895 count++;
896 }
897 return count;
898 }
899
900 target_ulong helper_vctzlsbb(ppc_avr_t *r)
901 {
902 target_ulong count = 0;
903 int i;
904 #if defined(HOST_WORDS_BIGENDIAN)
905 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
906 #else
907 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
908 #endif
909 if (r->u8[i] & 0x01) {
910 break;
911 }
912 count++;
913 }
914 return count;
915 }
916
917 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
918 ppc_avr_t *b, ppc_avr_t *c)
919 {
920 int sat = 0;
921 int i;
922
923 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
924 int32_t prod = a->s16[i] * b->s16[i];
925 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
926
927 r->s16[i] = cvtswsh(t, &sat);
928 }
929
930 if (sat) {
931 env->vscr |= (1 << VSCR_SAT);
932 }
933 }
934
935 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
936 ppc_avr_t *b, ppc_avr_t *c)
937 {
938 int sat = 0;
939 int i;
940
941 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
942 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
943 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
944 r->s16[i] = cvtswsh(t, &sat);
945 }
946
947 if (sat) {
948 env->vscr |= (1 << VSCR_SAT);
949 }
950 }
951
952 #define VMINMAX_DO(name, compare, element) \
953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
954 { \
955 int i; \
956 \
957 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
958 if (a->element[i] compare b->element[i]) { \
959 r->element[i] = b->element[i]; \
960 } else { \
961 r->element[i] = a->element[i]; \
962 } \
963 } \
964 }
965 #define VMINMAX(suffix, element) \
966 VMINMAX_DO(min##suffix, >, element) \
967 VMINMAX_DO(max##suffix, <, element)
968 VMINMAX(sb, s8)
969 VMINMAX(sh, s16)
970 VMINMAX(sw, s32)
971 VMINMAX(sd, s64)
972 VMINMAX(ub, u8)
973 VMINMAX(uh, u16)
974 VMINMAX(uw, u32)
975 VMINMAX(ud, u64)
976 #undef VMINMAX_DO
977 #undef VMINMAX
978
979 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
980 {
981 int i;
982
983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
984 int32_t prod = a->s16[i] * b->s16[i];
985 r->s16[i] = (int16_t) (prod + c->s16[i]);
986 }
987 }
988
989 #define VMRG_DO(name, element, highp) \
990 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
991 { \
992 ppc_avr_t result; \
993 int i; \
994 size_t n_elems = ARRAY_SIZE(r->element); \
995 \
996 for (i = 0; i < n_elems / 2; i++) { \
997 if (highp) { \
998 result.element[i*2+HI_IDX] = a->element[i]; \
999 result.element[i*2+LO_IDX] = b->element[i]; \
1000 } else { \
1001 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1002 b->element[n_elems - i - 1]; \
1003 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1004 a->element[n_elems - i - 1]; \
1005 } \
1006 } \
1007 *r = result; \
1008 }
1009 #if defined(HOST_WORDS_BIGENDIAN)
1010 #define MRGHI 0
1011 #define MRGLO 1
1012 #else
1013 #define MRGHI 1
1014 #define MRGLO 0
1015 #endif
1016 #define VMRG(suffix, element) \
1017 VMRG_DO(mrgl##suffix, element, MRGHI) \
1018 VMRG_DO(mrgh##suffix, element, MRGLO)
1019 VMRG(b, u8)
1020 VMRG(h, u16)
1021 VMRG(w, u32)
1022 #undef VMRG_DO
1023 #undef VMRG
1024 #undef MRGHI
1025 #undef MRGLO
1026
1027 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1028 ppc_avr_t *b, ppc_avr_t *c)
1029 {
1030 int32_t prod[16];
1031 int i;
1032
1033 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1034 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1035 }
1036
1037 VECTOR_FOR_INORDER_I(i, s32) {
1038 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1039 prod[4 * i + 2] + prod[4 * i + 3];
1040 }
1041 }
1042
1043 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1044 ppc_avr_t *b, ppc_avr_t *c)
1045 {
1046 int32_t prod[8];
1047 int i;
1048
1049 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1050 prod[i] = a->s16[i] * b->s16[i];
1051 }
1052
1053 VECTOR_FOR_INORDER_I(i, s32) {
1054 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1055 }
1056 }
1057
1058 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1059 ppc_avr_t *b, ppc_avr_t *c)
1060 {
1061 int32_t prod[8];
1062 int i;
1063 int sat = 0;
1064
1065 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1066 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1067 }
1068
1069 VECTOR_FOR_INORDER_I(i, s32) {
1070 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1071
1072 r->u32[i] = cvtsdsw(t, &sat);
1073 }
1074
1075 if (sat) {
1076 env->vscr |= (1 << VSCR_SAT);
1077 }
1078 }
1079
1080 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1081 ppc_avr_t *b, ppc_avr_t *c)
1082 {
1083 uint16_t prod[16];
1084 int i;
1085
1086 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1087 prod[i] = a->u8[i] * b->u8[i];
1088 }
1089
1090 VECTOR_FOR_INORDER_I(i, u32) {
1091 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1092 prod[4 * i + 2] + prod[4 * i + 3];
1093 }
1094 }
1095
1096 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1097 ppc_avr_t *b, ppc_avr_t *c)
1098 {
1099 uint32_t prod[8];
1100 int i;
1101
1102 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1103 prod[i] = a->u16[i] * b->u16[i];
1104 }
1105
1106 VECTOR_FOR_INORDER_I(i, u32) {
1107 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1108 }
1109 }
1110
1111 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1112 ppc_avr_t *b, ppc_avr_t *c)
1113 {
1114 uint32_t prod[8];
1115 int i;
1116 int sat = 0;
1117
1118 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1119 prod[i] = a->u16[i] * b->u16[i];
1120 }
1121
1122 VECTOR_FOR_INORDER_I(i, s32) {
1123 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1124
1125 r->u32[i] = cvtuduw(t, &sat);
1126 }
1127
1128 if (sat) {
1129 env->vscr |= (1 << VSCR_SAT);
1130 }
1131 }
1132
1133 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1134 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1135 { \
1136 int i; \
1137 \
1138 VECTOR_FOR_INORDER_I(i, prod_element) { \
1139 if (evenp) { \
1140 r->prod_element[i] = \
1141 (cast)a->mul_element[i * 2 + HI_IDX] * \
1142 (cast)b->mul_element[i * 2 + HI_IDX]; \
1143 } else { \
1144 r->prod_element[i] = \
1145 (cast)a->mul_element[i * 2 + LO_IDX] * \
1146 (cast)b->mul_element[i * 2 + LO_IDX]; \
1147 } \
1148 } \
1149 }
1150 #define VMUL(suffix, mul_element, prod_element, cast) \
1151 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1152 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1153 VMUL(sb, s8, s16, int16_t)
1154 VMUL(sh, s16, s32, int32_t)
1155 VMUL(sw, s32, s64, int64_t)
1156 VMUL(ub, u8, u16, uint16_t)
1157 VMUL(uh, u16, u32, uint32_t)
1158 VMUL(uw, u32, u64, uint64_t)
1159 #undef VMUL_DO
1160 #undef VMUL
1161
1162 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1163 ppc_avr_t *c)
1164 {
1165 ppc_avr_t result;
1166 int i;
1167
1168 VECTOR_FOR_INORDER_I(i, u8) {
1169 int s = c->u8[i] & 0x1f;
1170 #if defined(HOST_WORDS_BIGENDIAN)
1171 int index = s & 0xf;
1172 #else
1173 int index = 15 - (s & 0xf);
1174 #endif
1175
1176 if (s & 0x10) {
1177 result.u8[i] = b->u8[index];
1178 } else {
1179 result.u8[i] = a->u8[index];
1180 }
1181 }
1182 *r = result;
1183 }
1184
1185 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1186 ppc_avr_t *c)
1187 {
1188 ppc_avr_t result;
1189 int i;
1190
1191 VECTOR_FOR_INORDER_I(i, u8) {
1192 int s = c->u8[i] & 0x1f;
1193 #if defined(HOST_WORDS_BIGENDIAN)
1194 int index = 15 - (s & 0xf);
1195 #else
1196 int index = s & 0xf;
1197 #endif
1198
1199 if (s & 0x10) {
1200 result.u8[i] = a->u8[index];
1201 } else {
1202 result.u8[i] = b->u8[index];
1203 }
1204 }
1205 *r = result;
1206 }
1207
1208 #if defined(HOST_WORDS_BIGENDIAN)
1209 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1210 #define VBPERMD_INDEX(i) (i)
1211 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1212 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1213 #else
1214 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1215 #define VBPERMD_INDEX(i) (1 - i)
1216 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1217 #define EXTRACT_BIT(avr, i, index) \
1218 (extract64((avr)->u64[1 - i], 63 - index, 1))
1219 #endif
1220
1221 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1222 {
1223 int i, j;
1224 ppc_avr_t result = { .u64 = { 0, 0 } };
1225 VECTOR_FOR_INORDER_I(i, u64) {
1226 for (j = 0; j < 8; j++) {
1227 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1228 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1229 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1230 }
1231 }
1232 }
1233 *r = result;
1234 }
1235
1236 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1237 {
1238 int i;
1239 uint64_t perm = 0;
1240
1241 VECTOR_FOR_INORDER_I(i, u8) {
1242 int index = VBPERMQ_INDEX(b, i);
1243
1244 if (index < 128) {
1245 uint64_t mask = (1ull << (63-(index & 0x3F)));
1246 if (a->u64[VBPERMQ_DW(index)] & mask) {
1247 perm |= (0x8000 >> i);
1248 }
1249 }
1250 }
1251
1252 r->u64[HI_IDX] = perm;
1253 r->u64[LO_IDX] = 0;
1254 }
1255
1256 #undef VBPERMQ_INDEX
1257 #undef VBPERMQ_DW
1258
1259 static const uint64_t VGBBD_MASKS[256] = {
1260 0x0000000000000000ull, /* 00 */
1261 0x0000000000000080ull, /* 01 */
1262 0x0000000000008000ull, /* 02 */
1263 0x0000000000008080ull, /* 03 */
1264 0x0000000000800000ull, /* 04 */
1265 0x0000000000800080ull, /* 05 */
1266 0x0000000000808000ull, /* 06 */
1267 0x0000000000808080ull, /* 07 */
1268 0x0000000080000000ull, /* 08 */
1269 0x0000000080000080ull, /* 09 */
1270 0x0000000080008000ull, /* 0A */
1271 0x0000000080008080ull, /* 0B */
1272 0x0000000080800000ull, /* 0C */
1273 0x0000000080800080ull, /* 0D */
1274 0x0000000080808000ull, /* 0E */
1275 0x0000000080808080ull, /* 0F */
1276 0x0000008000000000ull, /* 10 */
1277 0x0000008000000080ull, /* 11 */
1278 0x0000008000008000ull, /* 12 */
1279 0x0000008000008080ull, /* 13 */
1280 0x0000008000800000ull, /* 14 */
1281 0x0000008000800080ull, /* 15 */
1282 0x0000008000808000ull, /* 16 */
1283 0x0000008000808080ull, /* 17 */
1284 0x0000008080000000ull, /* 18 */
1285 0x0000008080000080ull, /* 19 */
1286 0x0000008080008000ull, /* 1A */
1287 0x0000008080008080ull, /* 1B */
1288 0x0000008080800000ull, /* 1C */
1289 0x0000008080800080ull, /* 1D */
1290 0x0000008080808000ull, /* 1E */
1291 0x0000008080808080ull, /* 1F */
1292 0x0000800000000000ull, /* 20 */
1293 0x0000800000000080ull, /* 21 */
1294 0x0000800000008000ull, /* 22 */
1295 0x0000800000008080ull, /* 23 */
1296 0x0000800000800000ull, /* 24 */
1297 0x0000800000800080ull, /* 25 */
1298 0x0000800000808000ull, /* 26 */
1299 0x0000800000808080ull, /* 27 */
1300 0x0000800080000000ull, /* 28 */
1301 0x0000800080000080ull, /* 29 */
1302 0x0000800080008000ull, /* 2A */
1303 0x0000800080008080ull, /* 2B */
1304 0x0000800080800000ull, /* 2C */
1305 0x0000800080800080ull, /* 2D */
1306 0x0000800080808000ull, /* 2E */
1307 0x0000800080808080ull, /* 2F */
1308 0x0000808000000000ull, /* 30 */
1309 0x0000808000000080ull, /* 31 */
1310 0x0000808000008000ull, /* 32 */
1311 0x0000808000008080ull, /* 33 */
1312 0x0000808000800000ull, /* 34 */
1313 0x0000808000800080ull, /* 35 */
1314 0x0000808000808000ull, /* 36 */
1315 0x0000808000808080ull, /* 37 */
1316 0x0000808080000000ull, /* 38 */
1317 0x0000808080000080ull, /* 39 */
1318 0x0000808080008000ull, /* 3A */
1319 0x0000808080008080ull, /* 3B */
1320 0x0000808080800000ull, /* 3C */
1321 0x0000808080800080ull, /* 3D */
1322 0x0000808080808000ull, /* 3E */
1323 0x0000808080808080ull, /* 3F */
1324 0x0080000000000000ull, /* 40 */
1325 0x0080000000000080ull, /* 41 */
1326 0x0080000000008000ull, /* 42 */
1327 0x0080000000008080ull, /* 43 */
1328 0x0080000000800000ull, /* 44 */
1329 0x0080000000800080ull, /* 45 */
1330 0x0080000000808000ull, /* 46 */
1331 0x0080000000808080ull, /* 47 */
1332 0x0080000080000000ull, /* 48 */
1333 0x0080000080000080ull, /* 49 */
1334 0x0080000080008000ull, /* 4A */
1335 0x0080000080008080ull, /* 4B */
1336 0x0080000080800000ull, /* 4C */
1337 0x0080000080800080ull, /* 4D */
1338 0x0080000080808000ull, /* 4E */
1339 0x0080000080808080ull, /* 4F */
1340 0x0080008000000000ull, /* 50 */
1341 0x0080008000000080ull, /* 51 */
1342 0x0080008000008000ull, /* 52 */
1343 0x0080008000008080ull, /* 53 */
1344 0x0080008000800000ull, /* 54 */
1345 0x0080008000800080ull, /* 55 */
1346 0x0080008000808000ull, /* 56 */
1347 0x0080008000808080ull, /* 57 */
1348 0x0080008080000000ull, /* 58 */
1349 0x0080008080000080ull, /* 59 */
1350 0x0080008080008000ull, /* 5A */
1351 0x0080008080008080ull, /* 5B */
1352 0x0080008080800000ull, /* 5C */
1353 0x0080008080800080ull, /* 5D */
1354 0x0080008080808000ull, /* 5E */
1355 0x0080008080808080ull, /* 5F */
1356 0x0080800000000000ull, /* 60 */
1357 0x0080800000000080ull, /* 61 */
1358 0x0080800000008000ull, /* 62 */
1359 0x0080800000008080ull, /* 63 */
1360 0x0080800000800000ull, /* 64 */
1361 0x0080800000800080ull, /* 65 */
1362 0x0080800000808000ull, /* 66 */
1363 0x0080800000808080ull, /* 67 */
1364 0x0080800080000000ull, /* 68 */
1365 0x0080800080000080ull, /* 69 */
1366 0x0080800080008000ull, /* 6A */
1367 0x0080800080008080ull, /* 6B */
1368 0x0080800080800000ull, /* 6C */
1369 0x0080800080800080ull, /* 6D */
1370 0x0080800080808000ull, /* 6E */
1371 0x0080800080808080ull, /* 6F */
1372 0x0080808000000000ull, /* 70 */
1373 0x0080808000000080ull, /* 71 */
1374 0x0080808000008000ull, /* 72 */
1375 0x0080808000008080ull, /* 73 */
1376 0x0080808000800000ull, /* 74 */
1377 0x0080808000800080ull, /* 75 */
1378 0x0080808000808000ull, /* 76 */
1379 0x0080808000808080ull, /* 77 */
1380 0x0080808080000000ull, /* 78 */
1381 0x0080808080000080ull, /* 79 */
1382 0x0080808080008000ull, /* 7A */
1383 0x0080808080008080ull, /* 7B */
1384 0x0080808080800000ull, /* 7C */
1385 0x0080808080800080ull, /* 7D */
1386 0x0080808080808000ull, /* 7E */
1387 0x0080808080808080ull, /* 7F */
1388 0x8000000000000000ull, /* 80 */
1389 0x8000000000000080ull, /* 81 */
1390 0x8000000000008000ull, /* 82 */
1391 0x8000000000008080ull, /* 83 */
1392 0x8000000000800000ull, /* 84 */
1393 0x8000000000800080ull, /* 85 */
1394 0x8000000000808000ull, /* 86 */
1395 0x8000000000808080ull, /* 87 */
1396 0x8000000080000000ull, /* 88 */
1397 0x8000000080000080ull, /* 89 */
1398 0x8000000080008000ull, /* 8A */
1399 0x8000000080008080ull, /* 8B */
1400 0x8000000080800000ull, /* 8C */
1401 0x8000000080800080ull, /* 8D */
1402 0x8000000080808000ull, /* 8E */
1403 0x8000000080808080ull, /* 8F */
1404 0x8000008000000000ull, /* 90 */
1405 0x8000008000000080ull, /* 91 */
1406 0x8000008000008000ull, /* 92 */
1407 0x8000008000008080ull, /* 93 */
1408 0x8000008000800000ull, /* 94 */
1409 0x8000008000800080ull, /* 95 */
1410 0x8000008000808000ull, /* 96 */
1411 0x8000008000808080ull, /* 97 */
1412 0x8000008080000000ull, /* 98 */
1413 0x8000008080000080ull, /* 99 */
1414 0x8000008080008000ull, /* 9A */
1415 0x8000008080008080ull, /* 9B */
1416 0x8000008080800000ull, /* 9C */
1417 0x8000008080800080ull, /* 9D */
1418 0x8000008080808000ull, /* 9E */
1419 0x8000008080808080ull, /* 9F */
1420 0x8000800000000000ull, /* A0 */
1421 0x8000800000000080ull, /* A1 */
1422 0x8000800000008000ull, /* A2 */
1423 0x8000800000008080ull, /* A3 */
1424 0x8000800000800000ull, /* A4 */
1425 0x8000800000800080ull, /* A5 */
1426 0x8000800000808000ull, /* A6 */
1427 0x8000800000808080ull, /* A7 */
1428 0x8000800080000000ull, /* A8 */
1429 0x8000800080000080ull, /* A9 */
1430 0x8000800080008000ull, /* AA */
1431 0x8000800080008080ull, /* AB */
1432 0x8000800080800000ull, /* AC */
1433 0x8000800080800080ull, /* AD */
1434 0x8000800080808000ull, /* AE */
1435 0x8000800080808080ull, /* AF */
1436 0x8000808000000000ull, /* B0 */
1437 0x8000808000000080ull, /* B1 */
1438 0x8000808000008000ull, /* B2 */
1439 0x8000808000008080ull, /* B3 */
1440 0x8000808000800000ull, /* B4 */
1441 0x8000808000800080ull, /* B5 */
1442 0x8000808000808000ull, /* B6 */
1443 0x8000808000808080ull, /* B7 */
1444 0x8000808080000000ull, /* B8 */
1445 0x8000808080000080ull, /* B9 */
1446 0x8000808080008000ull, /* BA */
1447 0x8000808080008080ull, /* BB */
1448 0x8000808080800000ull, /* BC */
1449 0x8000808080800080ull, /* BD */
1450 0x8000808080808000ull, /* BE */
1451 0x8000808080808080ull, /* BF */
1452 0x8080000000000000ull, /* C0 */
1453 0x8080000000000080ull, /* C1 */
1454 0x8080000000008000ull, /* C2 */
1455 0x8080000000008080ull, /* C3 */
1456 0x8080000000800000ull, /* C4 */
1457 0x8080000000800080ull, /* C5 */
1458 0x8080000000808000ull, /* C6 */
1459 0x8080000000808080ull, /* C7 */
1460 0x8080000080000000ull, /* C8 */
1461 0x8080000080000080ull, /* C9 */
1462 0x8080000080008000ull, /* CA */
1463 0x8080000080008080ull, /* CB */
1464 0x8080000080800000ull, /* CC */
1465 0x8080000080800080ull, /* CD */
1466 0x8080000080808000ull, /* CE */
1467 0x8080000080808080ull, /* CF */
1468 0x8080008000000000ull, /* D0 */
1469 0x8080008000000080ull, /* D1 */
1470 0x8080008000008000ull, /* D2 */
1471 0x8080008000008080ull, /* D3 */
1472 0x8080008000800000ull, /* D4 */
1473 0x8080008000800080ull, /* D5 */
1474 0x8080008000808000ull, /* D6 */
1475 0x8080008000808080ull, /* D7 */
1476 0x8080008080000000ull, /* D8 */
1477 0x8080008080000080ull, /* D9 */
1478 0x8080008080008000ull, /* DA */
1479 0x8080008080008080ull, /* DB */
1480 0x8080008080800000ull, /* DC */
1481 0x8080008080800080ull, /* DD */
1482 0x8080008080808000ull, /* DE */
1483 0x8080008080808080ull, /* DF */
1484 0x8080800000000000ull, /* E0 */
1485 0x8080800000000080ull, /* E1 */
1486 0x8080800000008000ull, /* E2 */
1487 0x8080800000008080ull, /* E3 */
1488 0x8080800000800000ull, /* E4 */
1489 0x8080800000800080ull, /* E5 */
1490 0x8080800000808000ull, /* E6 */
1491 0x8080800000808080ull, /* E7 */
1492 0x8080800080000000ull, /* E8 */
1493 0x8080800080000080ull, /* E9 */
1494 0x8080800080008000ull, /* EA */
1495 0x8080800080008080ull, /* EB */
1496 0x8080800080800000ull, /* EC */
1497 0x8080800080800080ull, /* ED */
1498 0x8080800080808000ull, /* EE */
1499 0x8080800080808080ull, /* EF */
1500 0x8080808000000000ull, /* F0 */
1501 0x8080808000000080ull, /* F1 */
1502 0x8080808000008000ull, /* F2 */
1503 0x8080808000008080ull, /* F3 */
1504 0x8080808000800000ull, /* F4 */
1505 0x8080808000800080ull, /* F5 */
1506 0x8080808000808000ull, /* F6 */
1507 0x8080808000808080ull, /* F7 */
1508 0x8080808080000000ull, /* F8 */
1509 0x8080808080000080ull, /* F9 */
1510 0x8080808080008000ull, /* FA */
1511 0x8080808080008080ull, /* FB */
1512 0x8080808080800000ull, /* FC */
1513 0x8080808080800080ull, /* FD */
1514 0x8080808080808000ull, /* FE */
1515 0x8080808080808080ull, /* FF */
1516 };
1517
1518 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1519 {
1520 int i;
1521 uint64_t t[2] = { 0, 0 };
1522
1523 VECTOR_FOR_INORDER_I(i, u8) {
1524 #if defined(HOST_WORDS_BIGENDIAN)
1525 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1526 #else
1527 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1528 #endif
1529 }
1530
1531 r->u64[0] = t[0];
1532 r->u64[1] = t[1];
1533 }
1534
1535 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1536 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1537 { \
1538 int i, j; \
1539 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1540 \
1541 VECTOR_FOR_INORDER_I(i, srcfld) { \
1542 prod[i] = 0; \
1543 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1544 if (a->srcfld[i] & (1ull<<j)) { \
1545 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1546 } \
1547 } \
1548 } \
1549 \
1550 VECTOR_FOR_INORDER_I(i, trgfld) { \
1551 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1552 } \
1553 }
1554
1555 PMSUM(vpmsumb, u8, u16, uint16_t)
1556 PMSUM(vpmsumh, u16, u32, uint32_t)
1557 PMSUM(vpmsumw, u32, u64, uint64_t)
1558
1559 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1560 {
1561
1562 #ifdef CONFIG_INT128
1563 int i, j;
1564 __uint128_t prod[2];
1565
1566 VECTOR_FOR_INORDER_I(i, u64) {
1567 prod[i] = 0;
1568 for (j = 0; j < 64; j++) {
1569 if (a->u64[i] & (1ull<<j)) {
1570 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1571 }
1572 }
1573 }
1574
1575 r->u128 = prod[0] ^ prod[1];
1576
1577 #else
1578 int i, j;
1579 ppc_avr_t prod[2];
1580
1581 VECTOR_FOR_INORDER_I(i, u64) {
1582 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1583 for (j = 0; j < 64; j++) {
1584 if (a->u64[i] & (1ull<<j)) {
1585 ppc_avr_t bshift;
1586 if (j == 0) {
1587 bshift.u64[HI_IDX] = 0;
1588 bshift.u64[LO_IDX] = b->u64[i];
1589 } else {
1590 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1591 bshift.u64[LO_IDX] = b->u64[i] << j;
1592 }
1593 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1594 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1595 }
1596 }
1597 }
1598
1599 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1600 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1601 #endif
1602 }
1603
1604
1605 #if defined(HOST_WORDS_BIGENDIAN)
1606 #define PKBIG 1
1607 #else
1608 #define PKBIG 0
1609 #endif
1610 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1611 {
1612 int i, j;
1613 ppc_avr_t result;
1614 #if defined(HOST_WORDS_BIGENDIAN)
1615 const ppc_avr_t *x[2] = { a, b };
1616 #else
1617 const ppc_avr_t *x[2] = { b, a };
1618 #endif
1619
1620 VECTOR_FOR_INORDER_I(i, u64) {
1621 VECTOR_FOR_INORDER_I(j, u32) {
1622 uint32_t e = x[i]->u32[j];
1623
1624 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1625 ((e >> 6) & 0x3e0) |
1626 ((e >> 3) & 0x1f));
1627 }
1628 }
1629 *r = result;
1630 }
1631
1632 #define VPK(suffix, from, to, cvt, dosat) \
1633 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1634 ppc_avr_t *a, ppc_avr_t *b) \
1635 { \
1636 int i; \
1637 int sat = 0; \
1638 ppc_avr_t result; \
1639 ppc_avr_t *a0 = PKBIG ? a : b; \
1640 ppc_avr_t *a1 = PKBIG ? b : a; \
1641 \
1642 VECTOR_FOR_INORDER_I(i, from) { \
1643 result.to[i] = cvt(a0->from[i], &sat); \
1644 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1645 } \
1646 *r = result; \
1647 if (dosat && sat) { \
1648 env->vscr |= (1 << VSCR_SAT); \
1649 } \
1650 }
1651 #define I(x, y) (x)
1652 VPK(shss, s16, s8, cvtshsb, 1)
1653 VPK(shus, s16, u8, cvtshub, 1)
1654 VPK(swss, s32, s16, cvtswsh, 1)
1655 VPK(swus, s32, u16, cvtswuh, 1)
1656 VPK(sdss, s64, s32, cvtsdsw, 1)
1657 VPK(sdus, s64, u32, cvtsduw, 1)
1658 VPK(uhus, u16, u8, cvtuhub, 1)
1659 VPK(uwus, u32, u16, cvtuwuh, 1)
1660 VPK(udus, u64, u32, cvtuduw, 1)
1661 VPK(uhum, u16, u8, I, 0)
1662 VPK(uwum, u32, u16, I, 0)
1663 VPK(udum, u64, u32, I, 0)
1664 #undef I
1665 #undef VPK
1666 #undef PKBIG
1667
1668 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1669 {
1670 int i;
1671
1672 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1673 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1674 }
1675 }
1676
1677 #define VRFI(suffix, rounding) \
1678 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1679 ppc_avr_t *b) \
1680 { \
1681 int i; \
1682 float_status s = env->vec_status; \
1683 \
1684 set_float_rounding_mode(rounding, &s); \
1685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1686 r->f[i] = float32_round_to_int (b->f[i], &s); \
1687 } \
1688 }
1689 VRFI(n, float_round_nearest_even)
1690 VRFI(m, float_round_down)
1691 VRFI(p, float_round_up)
1692 VRFI(z, float_round_to_zero)
1693 #undef VRFI
1694
1695 #define VROTATE(suffix, element, mask) \
1696 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1697 { \
1698 int i; \
1699 \
1700 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1701 unsigned int shift = b->element[i] & mask; \
1702 r->element[i] = (a->element[i] << shift) | \
1703 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1704 } \
1705 }
1706 VROTATE(b, u8, 0x7)
1707 VROTATE(h, u16, 0xF)
1708 VROTATE(w, u32, 0x1F)
1709 VROTATE(d, u64, 0x3F)
1710 #undef VROTATE
1711
1712 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1713 {
1714 int i;
1715
1716 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1717 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1718
1719 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1720 }
1721 }
1722
1723 #define VRLMI(name, size, element, insert) \
1724 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1725 { \
1726 int i; \
1727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1728 uint##size##_t src1 = a->element[i]; \
1729 uint##size##_t src2 = b->element[i]; \
1730 uint##size##_t src3 = r->element[i]; \
1731 uint##size##_t begin, end, shift, mask, rot_val; \
1732 \
1733 shift = extract##size(src2, 0, 6); \
1734 end = extract##size(src2, 8, 6); \
1735 begin = extract##size(src2, 16, 6); \
1736 rot_val = rol##size(src1, shift); \
1737 mask = mask_u##size(begin, end); \
1738 if (insert) { \
1739 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1740 } else { \
1741 r->element[i] = (rot_val & mask); \
1742 } \
1743 } \
1744 }
1745
1746 VRLMI(vrldmi, 64, u64, 1);
1747 VRLMI(vrlwmi, 32, u32, 1);
1748 VRLMI(vrldnm, 64, u64, 0);
1749 VRLMI(vrlwnm, 32, u32, 0);
1750
1751 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1752 ppc_avr_t *c)
1753 {
1754 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1755 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1756 }
1757
1758 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1759 {
1760 int i;
1761
1762 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1763 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1764 }
1765 }
1766
1767 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1768 {
1769 int i;
1770
1771 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1772 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1773 }
1774 }
1775
1776 /* The specification says that the results are undefined if all of the
1777 * shift counts are not identical. We check to make sure that they are
1778 * to conform to what real hardware appears to do. */
1779 #define VSHIFT(suffix, leftp) \
1780 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1781 { \
1782 int shift = b->u8[LO_IDX*15] & 0x7; \
1783 int doit = 1; \
1784 int i; \
1785 \
1786 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1787 doit = doit && ((b->u8[i] & 0x7) == shift); \
1788 } \
1789 if (doit) { \
1790 if (shift == 0) { \
1791 *r = *a; \
1792 } else if (leftp) { \
1793 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1794 \
1795 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1796 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1797 } else { \
1798 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1799 \
1800 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1801 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1802 } \
1803 } \
1804 }
1805 VSHIFT(l, 1)
1806 VSHIFT(r, 0)
1807 #undef VSHIFT
1808
1809 #define VSL(suffix, element, mask) \
1810 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1811 { \
1812 int i; \
1813 \
1814 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1815 unsigned int shift = b->element[i] & mask; \
1816 \
1817 r->element[i] = a->element[i] << shift; \
1818 } \
1819 }
1820 VSL(b, u8, 0x7)
1821 VSL(h, u16, 0x0F)
1822 VSL(w, u32, 0x1F)
1823 VSL(d, u64, 0x3F)
1824 #undef VSL
1825
1826 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1827 {
1828 int i;
1829 unsigned int shift, bytes, size;
1830
1831 size = ARRAY_SIZE(r->u8);
1832 for (i = 0; i < size; i++) {
1833 shift = b->u8[i] & 0x7; /* extract shift value */
1834 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1835 (((i + 1) < size) ? a->u8[i + 1] : 0);
1836 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1837 }
1838 }
1839
1840 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1841 {
1842 int i;
1843 unsigned int shift, bytes;
1844
1845 /* Use reverse order, as destination and source register can be same. Its
1846 * being modified in place saving temporary, reverse order will guarantee
1847 * that computed result is not fed back.
1848 */
1849 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1850 shift = b->u8[i] & 0x7; /* extract shift value */
1851 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1852 /* extract adjacent bytes */
1853 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1854 }
1855 }
1856
1857 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1858 {
1859 int sh = shift & 0xf;
1860 int i;
1861 ppc_avr_t result;
1862
1863 #if defined(HOST_WORDS_BIGENDIAN)
1864 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1865 int index = sh + i;
1866 if (index > 0xf) {
1867 result.u8[i] = b->u8[index - 0x10];
1868 } else {
1869 result.u8[i] = a->u8[index];
1870 }
1871 }
1872 #else
1873 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1874 int index = (16 - sh) + i;
1875 if (index > 0xf) {
1876 result.u8[i] = a->u8[index - 0x10];
1877 } else {
1878 result.u8[i] = b->u8[index];
1879 }
1880 }
1881 #endif
1882 *r = result;
1883 }
1884
1885 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1886 {
1887 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1888
1889 #if defined(HOST_WORDS_BIGENDIAN)
1890 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1891 memset(&r->u8[16-sh], 0, sh);
1892 #else
1893 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1894 memset(&r->u8[0], 0, sh);
1895 #endif
1896 }
1897
1898 /* Experimental testing shows that hardware masks the immediate. */
1899 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1900 #if defined(HOST_WORDS_BIGENDIAN)
1901 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1902 #else
1903 #define SPLAT_ELEMENT(element) \
1904 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1905 #endif
1906 #define VSPLT(suffix, element) \
1907 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1908 { \
1909 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1910 int i; \
1911 \
1912 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1913 r->element[i] = s; \
1914 } \
1915 }
1916 VSPLT(b, u8)
1917 VSPLT(h, u16)
1918 VSPLT(w, u32)
1919 #undef VSPLT
1920 #undef SPLAT_ELEMENT
1921 #undef _SPLAT_MASKED
1922 #if defined(HOST_WORDS_BIGENDIAN)
1923 #define VINSERT(suffix, element) \
1924 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1925 { \
1926 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1927 sizeof(r->element[0])); \
1928 }
1929 #else
1930 #define VINSERT(suffix, element) \
1931 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1932 { \
1933 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1934 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1935 }
1936 #endif
1937 VINSERT(b, u8)
1938 VINSERT(h, u16)
1939 VINSERT(w, u32)
1940 VINSERT(d, u64)
1941 #undef VINSERT
1942 #if defined(HOST_WORDS_BIGENDIAN)
1943 #define VEXTRACT(suffix, element) \
1944 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1945 { \
1946 uint32_t es = sizeof(r->element[0]); \
1947 memmove(&r->u8[8 - es], &b->u8[index], es); \
1948 memset(&r->u8[8], 0, 8); \
1949 memset(&r->u8[0], 0, 8 - es); \
1950 }
1951 #else
1952 #define VEXTRACT(suffix, element) \
1953 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1954 { \
1955 uint32_t es = sizeof(r->element[0]); \
1956 uint32_t s = (16 - index) - es; \
1957 memmove(&r->u8[8], &b->u8[s], es); \
1958 memset(&r->u8[0], 0, 8); \
1959 memset(&r->u8[8 + es], 0, 8 - es); \
1960 }
1961 #endif
1962 VEXTRACT(ub, u8)
1963 VEXTRACT(uh, u16)
1964 VEXTRACT(uw, u32)
1965 VEXTRACT(d, u64)
1966 #undef VEXTRACT
1967
1968 #define VEXT_SIGNED(name, element, mask, cast, recast) \
1969 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1970 { \
1971 int i; \
1972 VECTOR_FOR_INORDER_I(i, element) { \
1973 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
1974 } \
1975 }
1976 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
1977 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
1978 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
1979 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
1980 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
1981 #undef VEXT_SIGNED
1982
1983 #define VNEG(name, element) \
1984 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1985 { \
1986 int i; \
1987 VECTOR_FOR_INORDER_I(i, element) { \
1988 r->element[i] = -b->element[i]; \
1989 } \
1990 }
1991 VNEG(vnegw, s32)
1992 VNEG(vnegd, s64)
1993 #undef VNEG
1994
1995 #define VSPLTI(suffix, element, splat_type) \
1996 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1997 { \
1998 splat_type x = (int8_t)(splat << 3) >> 3; \
1999 int i; \
2000 \
2001 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2002 r->element[i] = x; \
2003 } \
2004 }
2005 VSPLTI(b, s8, int8_t)
2006 VSPLTI(h, s16, int16_t)
2007 VSPLTI(w, s32, int32_t)
2008 #undef VSPLTI
2009
2010 #define VSR(suffix, element, mask) \
2011 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2012 { \
2013 int i; \
2014 \
2015 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2016 unsigned int shift = b->element[i] & mask; \
2017 r->element[i] = a->element[i] >> shift; \
2018 } \
2019 }
2020 VSR(ab, s8, 0x7)
2021 VSR(ah, s16, 0xF)
2022 VSR(aw, s32, 0x1F)
2023 VSR(ad, s64, 0x3F)
2024 VSR(b, u8, 0x7)
2025 VSR(h, u16, 0xF)
2026 VSR(w, u32, 0x1F)
2027 VSR(d, u64, 0x3F)
2028 #undef VSR
2029
2030 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2031 {
2032 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2033
2034 #if defined(HOST_WORDS_BIGENDIAN)
2035 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2036 memset(&r->u8[0], 0, sh);
2037 #else
2038 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2039 memset(&r->u8[16 - sh], 0, sh);
2040 #endif
2041 }
2042
2043 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2044 {
2045 int i;
2046
2047 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2048 r->u32[i] = a->u32[i] >= b->u32[i];
2049 }
2050 }
2051
2052 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2053 {
2054 int64_t t;
2055 int i, upper;
2056 ppc_avr_t result;
2057 int sat = 0;
2058
2059 #if defined(HOST_WORDS_BIGENDIAN)
2060 upper = ARRAY_SIZE(r->s32)-1;
2061 #else
2062 upper = 0;
2063 #endif
2064 t = (int64_t)b->s32[upper];
2065 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2066 t += a->s32[i];
2067 result.s32[i] = 0;
2068 }
2069 result.s32[upper] = cvtsdsw(t, &sat);
2070 *r = result;
2071
2072 if (sat) {
2073 env->vscr |= (1 << VSCR_SAT);
2074 }
2075 }
2076
2077 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2078 {
2079 int i, j, upper;
2080 ppc_avr_t result;
2081 int sat = 0;
2082
2083 #if defined(HOST_WORDS_BIGENDIAN)
2084 upper = 1;
2085 #else
2086 upper = 0;
2087 #endif
2088 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2089 int64_t t = (int64_t)b->s32[upper + i * 2];
2090
2091 result.u64[i] = 0;
2092 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2093 t += a->s32[2 * i + j];
2094 }
2095 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2096 }
2097
2098 *r = result;
2099 if (sat) {
2100 env->vscr |= (1 << VSCR_SAT);
2101 }
2102 }
2103
2104 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2105 {
2106 int i, j;
2107 int sat = 0;
2108
2109 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2110 int64_t t = (int64_t)b->s32[i];
2111
2112 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2113 t += a->s8[4 * i + j];
2114 }
2115 r->s32[i] = cvtsdsw(t, &sat);
2116 }
2117
2118 if (sat) {
2119 env->vscr |= (1 << VSCR_SAT);
2120 }
2121 }
2122
2123 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2124 {
2125 int sat = 0;
2126 int i;
2127
2128 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2129 int64_t t = (int64_t)b->s32[i];
2130
2131 t += a->s16[2 * i] + a->s16[2 * i + 1];
2132 r->s32[i] = cvtsdsw(t, &sat);
2133 }
2134
2135 if (sat) {
2136 env->vscr |= (1 << VSCR_SAT);
2137 }
2138 }
2139
2140 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2141 {
2142 int i, j;
2143 int sat = 0;
2144
2145 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2146 uint64_t t = (uint64_t)b->u32[i];
2147
2148 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2149 t += a->u8[4 * i + j];
2150 }
2151 r->u32[i] = cvtuduw(t, &sat);
2152 }
2153
2154 if (sat) {
2155 env->vscr |= (1 << VSCR_SAT);
2156 }
2157 }
2158
2159 #if defined(HOST_WORDS_BIGENDIAN)
2160 #define UPKHI 1
2161 #define UPKLO 0
2162 #else
2163 #define UPKHI 0
2164 #define UPKLO 1
2165 #endif
2166 #define VUPKPX(suffix, hi) \
2167 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2168 { \
2169 int i; \
2170 ppc_avr_t result; \
2171 \
2172 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2173 uint16_t e = b->u16[hi ? i : i+4]; \
2174 uint8_t a = (e >> 15) ? 0xff : 0; \
2175 uint8_t r = (e >> 10) & 0x1f; \
2176 uint8_t g = (e >> 5) & 0x1f; \
2177 uint8_t b = e & 0x1f; \
2178 \
2179 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2180 } \
2181 *r = result; \
2182 }
2183 VUPKPX(lpx, UPKLO)
2184 VUPKPX(hpx, UPKHI)
2185 #undef VUPKPX
2186
2187 #define VUPK(suffix, unpacked, packee, hi) \
2188 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2189 { \
2190 int i; \
2191 ppc_avr_t result; \
2192 \
2193 if (hi) { \
2194 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2195 result.unpacked[i] = b->packee[i]; \
2196 } \
2197 } else { \
2198 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2199 i++) { \
2200 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2201 } \
2202 } \
2203 *r = result; \
2204 }
2205 VUPK(hsb, s16, s8, UPKHI)
2206 VUPK(hsh, s32, s16, UPKHI)
2207 VUPK(hsw, s64, s32, UPKHI)
2208 VUPK(lsb, s16, s8, UPKLO)
2209 VUPK(lsh, s32, s16, UPKLO)
2210 VUPK(lsw, s64, s32, UPKLO)
2211 #undef VUPK
2212 #undef UPKHI
2213 #undef UPKLO
2214
2215 #define VGENERIC_DO(name, element) \
2216 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2217 { \
2218 int i; \
2219 \
2220 VECTOR_FOR_INORDER_I(i, element) { \
2221 r->element[i] = name(b->element[i]); \
2222 } \
2223 }
2224
2225 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2226 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2227 #define clzw(v) clz32((v))
2228 #define clzd(v) clz64((v))
2229
2230 VGENERIC_DO(clzb, u8)
2231 VGENERIC_DO(clzh, u16)
2232 VGENERIC_DO(clzw, u32)
2233 VGENERIC_DO(clzd, u64)
2234
2235 #undef clzb
2236 #undef clzh
2237 #undef clzw
2238 #undef clzd
2239
2240 #define ctzb(v) ((v) ? ctz32(v) : 8)
2241 #define ctzh(v) ((v) ? ctz32(v) : 16)
2242 #define ctzw(v) ctz32((v))
2243 #define ctzd(v) ctz64((v))
2244
2245 VGENERIC_DO(ctzb, u8)
2246 VGENERIC_DO(ctzh, u16)
2247 VGENERIC_DO(ctzw, u32)
2248 VGENERIC_DO(ctzd, u64)
2249
2250 #undef ctzb
2251 #undef ctzh
2252 #undef ctzw
2253 #undef ctzd
2254
2255 #define popcntb(v) ctpop8(v)
2256 #define popcnth(v) ctpop16(v)
2257 #define popcntw(v) ctpop32(v)
2258 #define popcntd(v) ctpop64(v)
2259
2260 VGENERIC_DO(popcntb, u8)
2261 VGENERIC_DO(popcnth, u16)
2262 VGENERIC_DO(popcntw, u32)
2263 VGENERIC_DO(popcntd, u64)
2264
2265 #undef popcntb
2266 #undef popcnth
2267 #undef popcntw
2268 #undef popcntd
2269
2270 #undef VGENERIC_DO
2271
2272 #if defined(HOST_WORDS_BIGENDIAN)
2273 #define QW_ONE { .u64 = { 0, 1 } }
2274 #else
2275 #define QW_ONE { .u64 = { 1, 0 } }
2276 #endif
2277
2278 #ifndef CONFIG_INT128
2279
2280 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2281 {
2282 t->u64[0] = ~a.u64[0];
2283 t->u64[1] = ~a.u64[1];
2284 }
2285
2286 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2287 {
2288 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2289 return -1;
2290 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2291 return 1;
2292 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2293 return -1;
2294 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2295 return 1;
2296 } else {
2297 return 0;
2298 }
2299 }
2300
2301 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2302 {
2303 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2304 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2305 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2306 }
2307
2308 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2309 {
2310 ppc_avr_t not_a;
2311 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2312 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2313 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2314 avr_qw_not(&not_a, a);
2315 return avr_qw_cmpu(not_a, b) < 0;
2316 }
2317
2318 #endif
2319
2320 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2321 {
2322 #ifdef CONFIG_INT128
2323 r->u128 = a->u128 + b->u128;
2324 #else
2325 avr_qw_add(r, *a, *b);
2326 #endif
2327 }
2328
2329 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2330 {
2331 #ifdef CONFIG_INT128
2332 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2333 #else
2334
2335 if (c->u64[LO_IDX] & 1) {
2336 ppc_avr_t tmp;
2337
2338 tmp.u64[HI_IDX] = 0;
2339 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2340 avr_qw_add(&tmp, *a, tmp);
2341 avr_qw_add(r, tmp, *b);
2342 } else {
2343 avr_qw_add(r, *a, *b);
2344 }
2345 #endif
2346 }
2347
2348 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2349 {
2350 #ifdef CONFIG_INT128
2351 r->u128 = (~a->u128 < b->u128);
2352 #else
2353 ppc_avr_t not_a;
2354
2355 avr_qw_not(&not_a, *a);
2356
2357 r->u64[HI_IDX] = 0;
2358 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2359 #endif
2360 }
2361
2362 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2363 {
2364 #ifdef CONFIG_INT128
2365 int carry_out = (~a->u128 < b->u128);
2366 if (!carry_out && (c->u128 & 1)) {
2367 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2368 ((a->u128 != 0) || (b->u128 != 0));
2369 }
2370 r->u128 = carry_out;
2371 #else
2372
2373 int carry_in = c->u64[LO_IDX] & 1;
2374 int carry_out = 0;
2375 ppc_avr_t tmp;
2376
2377 carry_out = avr_qw_addc(&tmp, *a, *b);
2378
2379 if (!carry_out && carry_in) {
2380 ppc_avr_t one = QW_ONE;
2381 carry_out = avr_qw_addc(&tmp, tmp, one);
2382 }
2383 r->u64[HI_IDX] = 0;
2384 r->u64[LO_IDX] = carry_out;
2385 #endif
2386 }
2387
2388 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2389 {
2390 #ifdef CONFIG_INT128
2391 r->u128 = a->u128 - b->u128;
2392 #else
2393 ppc_avr_t tmp;
2394 ppc_avr_t one = QW_ONE;
2395
2396 avr_qw_not(&tmp, *b);
2397 avr_qw_add(&tmp, *a, tmp);
2398 avr_qw_add(r, tmp, one);
2399 #endif
2400 }
2401
2402 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2403 {
2404 #ifdef CONFIG_INT128
2405 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2406 #else
2407 ppc_avr_t tmp, sum;
2408
2409 avr_qw_not(&tmp, *b);
2410 avr_qw_add(&sum, *a, tmp);
2411
2412 tmp.u64[HI_IDX] = 0;
2413 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2414 avr_qw_add(r, sum, tmp);
2415 #endif
2416 }
2417
2418 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2419 {
2420 #ifdef CONFIG_INT128
2421 r->u128 = (~a->u128 < ~b->u128) ||
2422 (a->u128 + ~b->u128 == (__uint128_t)-1);
2423 #else
2424 int carry = (avr_qw_cmpu(*a, *b) > 0);
2425 if (!carry) {
2426 ppc_avr_t tmp;
2427 avr_qw_not(&tmp, *b);
2428 avr_qw_add(&tmp, *a, tmp);
2429 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2430 }
2431 r->u64[HI_IDX] = 0;
2432 r->u64[LO_IDX] = carry;
2433 #endif
2434 }
2435
2436 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2437 {
2438 #ifdef CONFIG_INT128
2439 r->u128 =
2440 (~a->u128 < ~b->u128) ||
2441 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2442 #else
2443 int carry_in = c->u64[LO_IDX] & 1;
2444 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2445 if (!carry_out && carry_in) {
2446 ppc_avr_t tmp;
2447 avr_qw_not(&tmp, *b);
2448 avr_qw_add(&tmp, *a, tmp);
2449 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2450 }
2451
2452 r->u64[HI_IDX] = 0;
2453 r->u64[LO_IDX] = carry_out;
2454 #endif
2455 }
2456
2457 #define BCD_PLUS_PREF_1 0xC
2458 #define BCD_PLUS_PREF_2 0xF
2459 #define BCD_PLUS_ALT_1 0xA
2460 #define BCD_NEG_PREF 0xD
2461 #define BCD_NEG_ALT 0xB
2462 #define BCD_PLUS_ALT_2 0xE
2463 #define NATIONAL_PLUS 0x2B
2464 #define NATIONAL_NEG 0x2D
2465
2466 #if defined(HOST_WORDS_BIGENDIAN)
2467 #define BCD_DIG_BYTE(n) (15 - (n/2))
2468 #else
2469 #define BCD_DIG_BYTE(n) (n/2)
2470 #endif
2471
2472 static int bcd_get_sgn(ppc_avr_t *bcd)
2473 {
2474 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2475 case BCD_PLUS_PREF_1:
2476 case BCD_PLUS_PREF_2:
2477 case BCD_PLUS_ALT_1:
2478 case BCD_PLUS_ALT_2:
2479 {
2480 return 1;
2481 }
2482
2483 case BCD_NEG_PREF:
2484 case BCD_NEG_ALT:
2485 {
2486 return -1;
2487 }
2488
2489 default:
2490 {
2491 return 0;
2492 }
2493 }
2494 }
2495
2496 static int bcd_preferred_sgn(int sgn, int ps)
2497 {
2498 if (sgn >= 0) {
2499 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2500 } else {
2501 return BCD_NEG_PREF;
2502 }
2503 }
2504
2505 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2506 {
2507 uint8_t result;
2508 if (n & 1) {
2509 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2510 } else {
2511 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2512 }
2513
2514 if (unlikely(result > 9)) {
2515 *invalid = true;
2516 }
2517 return result;
2518 }
2519
2520 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2521 {
2522 if (n & 1) {
2523 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2524 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2525 } else {
2526 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2527 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2528 }
2529 }
2530
2531 static int bcd_cmp_zero(ppc_avr_t *bcd)
2532 {
2533 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2534 return 1 << CRF_EQ;
2535 } else {
2536 return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2537 }
2538 }
2539
2540 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2541 {
2542 #if defined(HOST_WORDS_BIGENDIAN)
2543 return reg->u16[7 - n];
2544 #else
2545 return reg->u16[n];
2546 #endif
2547 }
2548
2549 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2550 {
2551 #if defined(HOST_WORDS_BIGENDIAN)
2552 reg->u16[7 - n] = val;
2553 #else
2554 reg->u16[n] = val;
2555 #endif
2556 }
2557
2558 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2559 {
2560 int i;
2561 int invalid = 0;
2562 for (i = 31; i > 0; i--) {
2563 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2564 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2565 if (unlikely(invalid)) {
2566 return 0; /* doesn't matter */
2567 } else if (dig_a > dig_b) {
2568 return 1;
2569 } else if (dig_a < dig_b) {
2570 return -1;
2571 }
2572 }
2573
2574 return 0;
2575 }
2576
2577 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2578 int *overflow)
2579 {
2580 int carry = 0;
2581 int i;
2582 int is_zero = 1;
2583 for (i = 1; i <= 31; i++) {
2584 uint8_t digit = bcd_get_digit(a, i, invalid) +
2585 bcd_get_digit(b, i, invalid) + carry;
2586 is_zero &= (digit == 0);
2587 if (digit > 9) {
2588 carry = 1;
2589 digit -= 10;
2590 } else {
2591 carry = 0;
2592 }
2593
2594 bcd_put_digit(t, digit, i);
2595
2596 if (unlikely(*invalid)) {
2597 return -1;
2598 }
2599 }
2600
2601 *overflow = carry;
2602 return is_zero;
2603 }
2604
2605 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2606 int *overflow)
2607 {
2608 int carry = 0;
2609 int i;
2610 int is_zero = 1;
2611 for (i = 1; i <= 31; i++) {
2612 uint8_t digit = bcd_get_digit(a, i, invalid) -
2613 bcd_get_digit(b, i, invalid) + carry;
2614 is_zero &= (digit == 0);
2615 if (digit & 0x80) {
2616 carry = -1;
2617 digit += 10;
2618 } else {
2619 carry = 0;
2620 }
2621
2622 bcd_put_digit(t, digit, i);
2623
2624 if (unlikely(*invalid)) {
2625 return -1;
2626 }
2627 }
2628
2629 *overflow = carry;
2630 return is_zero;
2631 }
2632
2633 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2634 {
2635
2636 int sgna = bcd_get_sgn(a);
2637 int sgnb = bcd_get_sgn(b);
2638 int invalid = (sgna == 0) || (sgnb == 0);
2639 int overflow = 0;
2640 int zero = 0;
2641 uint32_t cr = 0;
2642 ppc_avr_t result = { .u64 = { 0, 0 } };
2643
2644 if (!invalid) {
2645 if (sgna == sgnb) {
2646 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2647 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2648 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2649 } else if (bcd_cmp_mag(a, b) > 0) {
2650 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2651 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2652 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2653 } else {
2654 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2655 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2656 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2657 }
2658 }
2659
2660 if (unlikely(invalid)) {
2661 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2662 cr = 1 << CRF_SO;
2663 } else if (overflow) {
2664 cr |= 1 << CRF_SO;
2665 } else if (zero) {
2666 cr = 1 << CRF_EQ;
2667 }
2668
2669 *r = result;
2670
2671 return cr;
2672 }
2673
2674 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2675 {
2676 ppc_avr_t bcopy = *b;
2677 int sgnb = bcd_get_sgn(b);
2678 if (sgnb < 0) {
2679 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2680 } else if (sgnb > 0) {
2681 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2682 }
2683 /* else invalid ... defer to bcdadd code for proper handling */
2684
2685 return helper_bcdadd(r, a, &bcopy, ps);
2686 }
2687
2688 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2689 {
2690 int i;
2691 int cr = 0;
2692 uint16_t national = 0;
2693 uint16_t sgnb = get_national_digit(b, 0);
2694 ppc_avr_t ret = { .u64 = { 0, 0 } };
2695 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2696
2697 for (i = 1; i < 8; i++) {
2698 national = get_national_digit(b, i);
2699 if (unlikely(national < 0x30 || national > 0x39)) {
2700 invalid = 1;
2701 break;
2702 }
2703
2704 bcd_put_digit(&ret, national & 0xf, i);
2705 }
2706
2707 if (sgnb == NATIONAL_PLUS) {
2708 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2709 } else {
2710 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2711 }
2712
2713 cr = bcd_cmp_zero(&ret);
2714
2715 if (unlikely(invalid)) {
2716 cr = 1 << CRF_SO;
2717 }
2718
2719 *r = ret;
2720
2721 return cr;
2722 }
2723
2724 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2725 {
2726 int i;
2727 int cr = 0;
2728 int sgnb = bcd_get_sgn(b);
2729 int invalid = (sgnb == 0);
2730 ppc_avr_t ret = { .u64 = { 0, 0 } };
2731
2732 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2733
2734 for (i = 1; i < 8; i++) {
2735 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2736
2737 if (unlikely(invalid)) {
2738 break;
2739 }
2740 }
2741 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2742
2743 cr = bcd_cmp_zero(b);
2744
2745 if (ox_flag) {
2746 cr |= 1 << CRF_SO;
2747 }
2748
2749 if (unlikely(invalid)) {
2750 cr = 1 << CRF_SO;
2751 }
2752
2753 *r = ret;
2754
2755 return cr;
2756 }
2757
2758 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2759 {
2760 int i;
2761 int cr = 0;
2762 int invalid = 0;
2763 int zone_digit = 0;
2764 int zone_lead = ps ? 0xF : 0x3;
2765 int digit = 0;
2766 ppc_avr_t ret = { .u64 = { 0, 0 } };
2767 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2768
2769 if (unlikely((sgnb < 0xA) && ps)) {
2770 invalid = 1;
2771 }
2772
2773 for (i = 0; i < 16; i++) {
2774 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2775 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2776 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2777 invalid = 1;
2778 break;
2779 }
2780
2781 bcd_put_digit(&ret, digit, i + 1);
2782 }
2783
2784 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2785 (!ps && (sgnb & 0x4))) {
2786 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2787 } else {
2788 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2789 }
2790
2791 cr = bcd_cmp_zero(&ret);
2792
2793 if (unlikely(invalid)) {
2794 cr = 1 << CRF_SO;
2795 }
2796
2797 *r = ret;
2798
2799 return cr;
2800 }
2801
2802 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2803 {
2804 int i;
2805 int cr = 0;
2806 uint8_t digit = 0;
2807 int sgnb = bcd_get_sgn(b);
2808 int zone_lead = (ps) ? 0xF0 : 0x30;
2809 int invalid = (sgnb == 0);
2810 ppc_avr_t ret = { .u64 = { 0, 0 } };
2811
2812 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2813
2814 for (i = 0; i < 16; i++) {
2815 digit = bcd_get_digit(b, i + 1, &invalid);
2816
2817 if (unlikely(invalid)) {
2818 break;
2819 }
2820
2821 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2822 }
2823
2824 if (ps) {
2825 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2826 } else {
2827 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2828 }
2829
2830 cr = bcd_cmp_zero(b);
2831
2832 if (ox_flag) {
2833 cr |= 1 << CRF_SO;
2834 }
2835
2836 if (unlikely(invalid)) {
2837 cr = 1 << CRF_SO;
2838 }
2839
2840 *r = ret;
2841
2842 return cr;
2843 }
2844
2845 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2846 {
2847 int i;
2848 VECTOR_FOR_INORDER_I(i, u8) {
2849 r->u8[i] = AES_sbox[a->u8[i]];
2850 }
2851 }
2852
2853 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2854 {
2855 ppc_avr_t result;
2856 int i;
2857
2858 VECTOR_FOR_INORDER_I(i, u32) {
2859 result.AVRW(i) = b->AVRW(i) ^
2860 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2861 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2862 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2863 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2864 }
2865 *r = result;
2866 }
2867
2868 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2869 {
2870 ppc_avr_t result;
2871 int i;
2872
2873 VECTOR_FOR_INORDER_I(i, u8) {
2874 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2875 }
2876 *r = result;
2877 }
2878
2879 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2880 {
2881 /* This differs from what is written in ISA V2.07. The RTL is */
2882 /* incorrect and will be fixed in V2.07B. */
2883 int i;
2884 ppc_avr_t tmp;
2885
2886 VECTOR_FOR_INORDER_I(i, u8) {
2887 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2888 }
2889
2890 VECTOR_FOR_INORDER_I(i, u32) {
2891 r->AVRW(i) =
2892 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2893 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2894 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2895 AES_imc[tmp.AVRB(4*i + 3)][3];
2896 }
2897 }
2898
2899 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2900 {
2901 ppc_avr_t result;
2902 int i;
2903
2904 VECTOR_FOR_INORDER_I(i, u8) {
2905 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2906 }
2907 *r = result;
2908 }
2909
2910 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2911 #if defined(HOST_WORDS_BIGENDIAN)
2912 #define EL_IDX(i) (i)
2913 #else
2914 #define EL_IDX(i) (3 - (i))
2915 #endif
2916
2917 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2918 {
2919 int st = (st_six & 0x10) != 0;
2920 int six = st_six & 0xF;
2921 int i;
2922
2923 VECTOR_FOR_INORDER_I(i, u32) {
2924 if (st == 0) {
2925 if ((six & (0x8 >> i)) == 0) {
2926 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2927 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2928 (a->u32[EL_IDX(i)] >> 3);
2929 } else { /* six.bit[i] == 1 */
2930 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2931 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2932 (a->u32[EL_IDX(i)] >> 10);
2933 }
2934 } else { /* st == 1 */
2935 if ((six & (0x8 >> i)) == 0) {
2936 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2937 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2938 ROTRu32(a->u32[EL_IDX(i)], 22);
2939 } else { /* six.bit[i] == 1 */
2940 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2941 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2942 ROTRu32(a->u32[EL_IDX(i)], 25);
2943 }
2944 }
2945 }
2946 }
2947
2948 #undef ROTRu32
2949 #undef EL_IDX
2950
2951 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2952 #if defined(HOST_WORDS_BIGENDIAN)
2953 #define EL_IDX(i) (i)
2954 #else
2955 #define EL_IDX(i) (1 - (i))
2956 #endif
2957
2958 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2959 {
2960 int st = (st_six & 0x10) != 0;
2961 int six = st_six & 0xF;
2962 int i;
2963
2964 VECTOR_FOR_INORDER_I(i, u64) {
2965 if (st == 0) {
2966 if ((six & (0x8 >> (2*i))) == 0) {
2967 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2968 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2969 (a->u64[EL_IDX(i)] >> 7);
2970 } else { /* six.bit[2*i] == 1 */
2971 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2972 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2973 (a->u64[EL_IDX(i)] >> 6);
2974 }
2975 } else { /* st == 1 */
2976 if ((six & (0x8 >> (2*i))) == 0) {
2977 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2978 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2979 ROTRu64(a->u64[EL_IDX(i)], 39);
2980 } else { /* six.bit[2*i] == 1 */
2981 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2982 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2983 ROTRu64(a->u64[EL_IDX(i)], 41);
2984 }
2985 }
2986 }
2987 }
2988
2989 #undef ROTRu64
2990 #undef EL_IDX
2991
2992 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2993 {
2994 ppc_avr_t result;
2995 int i;
2996
2997 VECTOR_FOR_INORDER_I(i, u8) {
2998 int indexA = c->u8[i] >> 4;
2999 int indexB = c->u8[i] & 0xF;
3000 #if defined(HOST_WORDS_BIGENDIAN)
3001 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3002 #else
3003 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3004 #endif
3005 }
3006 *r = result;
3007 }
3008
3009 #undef VECTOR_FOR_INORDER_I
3010 #undef HI_IDX
3011 #undef LO_IDX
3012
3013 /*****************************************************************************/
3014 /* SPE extension helpers */
3015 /* Use a table to make this quicker */
3016 static const uint8_t hbrev[16] = {
3017 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3018 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3019 };
3020
3021 static inline uint8_t byte_reverse(uint8_t val)
3022 {
3023 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3024 }
3025
3026 static inline uint32_t word_reverse(uint32_t val)
3027 {
3028 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3029 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3030 }
3031
3032 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3033 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3034 {
3035 uint32_t a, b, d, mask;
3036
3037 mask = UINT32_MAX >> (32 - MASKBITS);
3038 a = arg1 & mask;
3039 b = arg2 & mask;
3040 d = word_reverse(1 + word_reverse(a | ~b));
3041 return (arg1 & ~mask) | (d & b);
3042 }
3043
3044 uint32_t helper_cntlsw32(uint32_t val)
3045 {
3046 if (val & 0x80000000) {
3047 return clz32(~val);
3048 } else {
3049 return clz32(val);
3050 }
3051 }
3052
3053 uint32_t helper_cntlzw32(uint32_t val)
3054 {
3055 return clz32(val);
3056 }
3057
3058 /* 440 specific */
3059 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3060 target_ulong low, uint32_t update_Rc)
3061 {
3062 target_ulong mask;
3063 int i;
3064
3065 i = 1;
3066 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3067 if ((high & mask) == 0) {
3068 if (update_Rc) {
3069 env->crf[0] = 0x4;
3070 }
3071 goto done;
3072 }
3073 i++;
3074 }
3075 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3076 if ((low & mask) == 0) {
3077 if (update_Rc) {
3078 env->crf[0] = 0x8;
3079 }
3080 goto done;
3081 }
3082 i++;
3083 }
3084 i = 8;
3085 if (update_Rc) {
3086 env->crf[0] = 0x2;
3087 }
3088 done:
3089 env->xer = (env->xer & ~0x7F) | i;
3090 if (update_Rc) {
3091 env->crf[0] |= xer_so;
3092 }
3093 return i;
3094 }