]> git.proxmox.com Git - mirror_qemu.git/blob - target-ppc/int_helper.c
target-ppc: add cnttzd[.] instruction
[mirror_qemu.git] / target-ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/exec-all.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
25
26 #include "helper_regs.h"
27 /*****************************************************************************/
28 /* Fixed point operations helpers */
29
30 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
32 {
33 uint64_t rt = 0;
34 int overflow = 0;
35
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
38
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
44 }
45
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
48 }
49
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
55 }
56 }
57
58 return (target_ulong)rt;
59 }
60
61 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
63 {
64 int64_t rt = 0;
65 int overflow = 0;
66
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
69
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
76 }
77
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
80 }
81
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
87 }
88 }
89
90 return (target_ulong)rt;
91 }
92
93 #if defined(TARGET_PPC64)
94
95 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
96 {
97 uint64_t rt = 0;
98 int overflow = 0;
99
100 overflow = divu128(&rt, &ra, rb);
101
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
104 }
105
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
111 }
112 }
113
114 return rt;
115 }
116
117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118 {
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
123
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
126 }
127
128 if (oe) {
129
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
134 }
135 }
136
137 return rt;
138 }
139
140 #endif
141
142
143 target_ulong helper_cntlzw(target_ulong t)
144 {
145 return clz32(t);
146 }
147
148 #if defined(TARGET_PPC64)
149 target_ulong helper_cntlzd(target_ulong t)
150 {
151 return clz64(t);
152 }
153
154 target_ulong helper_cnttzd(target_ulong t)
155 {
156 return ctz64(t);
157 }
158 #endif
159
160 #if defined(TARGET_PPC64)
161
162 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
163 {
164 int i;
165 uint64_t ra = 0;
166
167 for (i = 0; i < 8; i++) {
168 int index = (rs >> (i*8)) & 0xFF;
169 if (index < 64) {
170 if (rb & (1ull << (63-index))) {
171 ra |= 1 << i;
172 }
173 }
174 }
175 return ra;
176 }
177
178 #endif
179
180 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
181 {
182 target_ulong mask = 0xff;
183 target_ulong ra = 0;
184 int i;
185
186 for (i = 0; i < sizeof(target_ulong); i++) {
187 if ((rs & mask) == (rb & mask)) {
188 ra |= mask;
189 }
190 mask <<= 8;
191 }
192 return ra;
193 }
194
195 /* shift right arithmetic helper */
196 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
197 target_ulong shift)
198 {
199 int32_t ret;
200
201 if (likely(!(shift & 0x20))) {
202 if (likely((uint32_t)shift != 0)) {
203 shift &= 0x1f;
204 ret = (int32_t)value >> shift;
205 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
206 env->ca = 0;
207 } else {
208 env->ca = 1;
209 }
210 } else {
211 ret = (int32_t)value;
212 env->ca = 0;
213 }
214 } else {
215 ret = (int32_t)value >> 31;
216 env->ca = (ret != 0);
217 }
218 return (target_long)ret;
219 }
220
221 #if defined(TARGET_PPC64)
222 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
223 target_ulong shift)
224 {
225 int64_t ret;
226
227 if (likely(!(shift & 0x40))) {
228 if (likely((uint64_t)shift != 0)) {
229 shift &= 0x3f;
230 ret = (int64_t)value >> shift;
231 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
232 env->ca = 0;
233 } else {
234 env->ca = 1;
235 }
236 } else {
237 ret = (int64_t)value;
238 env->ca = 0;
239 }
240 } else {
241 ret = (int64_t)value >> 63;
242 env->ca = (ret != 0);
243 }
244 return ret;
245 }
246 #endif
247
248 #if defined(TARGET_PPC64)
249 target_ulong helper_popcntb(target_ulong val)
250 {
251 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
252 0x5555555555555555ULL);
253 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
254 0x3333333333333333ULL);
255 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
256 0x0f0f0f0f0f0f0f0fULL);
257 return val;
258 }
259
260 target_ulong helper_popcntw(target_ulong val)
261 {
262 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
263 0x5555555555555555ULL);
264 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
265 0x3333333333333333ULL);
266 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
267 0x0f0f0f0f0f0f0f0fULL);
268 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
269 0x00ff00ff00ff00ffULL);
270 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
271 0x0000ffff0000ffffULL);
272 return val;
273 }
274
275 target_ulong helper_popcntd(target_ulong val)
276 {
277 return ctpop64(val);
278 }
279 #else
280 target_ulong helper_popcntb(target_ulong val)
281 {
282 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
283 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
284 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
285 return val;
286 }
287
288 target_ulong helper_popcntw(target_ulong val)
289 {
290 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
291 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
292 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
293 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
294 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
295 return val;
296 }
297 #endif
298
299 /*****************************************************************************/
300 /* PowerPC 601 specific instructions (POWER bridge) */
301 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
302 {
303 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
304
305 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
306 (int32_t)arg2 == 0) {
307 env->spr[SPR_MQ] = 0;
308 return INT32_MIN;
309 } else {
310 env->spr[SPR_MQ] = tmp % arg2;
311 return tmp / (int32_t)arg2;
312 }
313 }
314
315 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
316 target_ulong arg2)
317 {
318 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
319
320 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
321 (int32_t)arg2 == 0) {
322 env->so = env->ov = 1;
323 env->spr[SPR_MQ] = 0;
324 return INT32_MIN;
325 } else {
326 env->spr[SPR_MQ] = tmp % arg2;
327 tmp /= (int32_t)arg2;
328 if ((int32_t)tmp != tmp) {
329 env->so = env->ov = 1;
330 } else {
331 env->ov = 0;
332 }
333 return tmp;
334 }
335 }
336
337 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
338 target_ulong arg2)
339 {
340 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
341 (int32_t)arg2 == 0) {
342 env->spr[SPR_MQ] = 0;
343 return INT32_MIN;
344 } else {
345 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
346 return (int32_t)arg1 / (int32_t)arg2;
347 }
348 }
349
350 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
351 target_ulong arg2)
352 {
353 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
354 (int32_t)arg2 == 0) {
355 env->so = env->ov = 1;
356 env->spr[SPR_MQ] = 0;
357 return INT32_MIN;
358 } else {
359 env->ov = 0;
360 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
361 return (int32_t)arg1 / (int32_t)arg2;
362 }
363 }
364
365 /*****************************************************************************/
366 /* 602 specific instructions */
367 /* mfrom is the most crazy instruction ever seen, imho ! */
368 /* Real implementation uses a ROM table. Do the same */
369 /* Extremely decomposed:
370 * -arg / 256
371 * return 256 * log10(10 + 1.0) + 0.5
372 */
373 #if !defined(CONFIG_USER_ONLY)
374 target_ulong helper_602_mfrom(target_ulong arg)
375 {
376 if (likely(arg < 602)) {
377 #include "mfrom_table.c"
378 return mfrom_ROM_table[arg];
379 } else {
380 return 0;
381 }
382 }
383 #endif
384
385 /*****************************************************************************/
386 /* Altivec extension helpers */
387 #if defined(HOST_WORDS_BIGENDIAN)
388 #define HI_IDX 0
389 #define LO_IDX 1
390 #define AVRB(i) u8[i]
391 #define AVRW(i) u32[i]
392 #else
393 #define HI_IDX 1
394 #define LO_IDX 0
395 #define AVRB(i) u8[15-(i)]
396 #define AVRW(i) u32[3-(i)]
397 #endif
398
399 #if defined(HOST_WORDS_BIGENDIAN)
400 #define VECTOR_FOR_INORDER_I(index, element) \
401 for (index = 0; index < ARRAY_SIZE(r->element); index++)
402 #else
403 #define VECTOR_FOR_INORDER_I(index, element) \
404 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
405 #endif
406
407 /* Saturating arithmetic helpers. */
408 #define SATCVT(from, to, from_type, to_type, min, max) \
409 static inline to_type cvt##from##to(from_type x, int *sat) \
410 { \
411 to_type r; \
412 \
413 if (x < (from_type)min) { \
414 r = min; \
415 *sat = 1; \
416 } else if (x > (from_type)max) { \
417 r = max; \
418 *sat = 1; \
419 } else { \
420 r = x; \
421 } \
422 return r; \
423 }
424 #define SATCVTU(from, to, from_type, to_type, min, max) \
425 static inline to_type cvt##from##to(from_type x, int *sat) \
426 { \
427 to_type r; \
428 \
429 if (x > (from_type)max) { \
430 r = max; \
431 *sat = 1; \
432 } else { \
433 r = x; \
434 } \
435 return r; \
436 }
437 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
438 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
439 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
440
441 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
442 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
443 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
444 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
445 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
446 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
447 #undef SATCVT
448 #undef SATCVTU
449
450 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
451 {
452 int i, j = (sh & 0xf);
453
454 VECTOR_FOR_INORDER_I(i, u8) {
455 r->u8[i] = j++;
456 }
457 }
458
459 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
460 {
461 int i, j = 0x10 - (sh & 0xf);
462
463 VECTOR_FOR_INORDER_I(i, u8) {
464 r->u8[i] = j++;
465 }
466 }
467
468 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
469 {
470 #if defined(HOST_WORDS_BIGENDIAN)
471 env->vscr = r->u32[3];
472 #else
473 env->vscr = r->u32[0];
474 #endif
475 set_flush_to_zero(vscr_nj, &env->vec_status);
476 }
477
478 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
479 {
480 int i;
481
482 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
483 r->u32[i] = ~a->u32[i] < b->u32[i];
484 }
485 }
486
487 #define VARITH_DO(name, op, element) \
488 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
489 { \
490 int i; \
491 \
492 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
493 r->element[i] = a->element[i] op b->element[i]; \
494 } \
495 }
496 #define VARITH(suffix, element) \
497 VARITH_DO(add##suffix, +, element) \
498 VARITH_DO(sub##suffix, -, element)
499 VARITH(ubm, u8)
500 VARITH(uhm, u16)
501 VARITH(uwm, u32)
502 VARITH(udm, u64)
503 VARITH_DO(muluwm, *, u32)
504 #undef VARITH_DO
505 #undef VARITH
506
507 #define VARITHFP(suffix, func) \
508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
509 ppc_avr_t *b) \
510 { \
511 int i; \
512 \
513 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
514 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
515 } \
516 }
517 VARITHFP(addfp, float32_add)
518 VARITHFP(subfp, float32_sub)
519 VARITHFP(minfp, float32_min)
520 VARITHFP(maxfp, float32_max)
521 #undef VARITHFP
522
523 #define VARITHFPFMA(suffix, type) \
524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
525 ppc_avr_t *b, ppc_avr_t *c) \
526 { \
527 int i; \
528 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
529 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
530 type, &env->vec_status); \
531 } \
532 }
533 VARITHFPFMA(maddfp, 0);
534 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
535 #undef VARITHFPFMA
536
537 #define VARITHSAT_CASE(type, op, cvt, element) \
538 { \
539 type result = (type)a->element[i] op (type)b->element[i]; \
540 r->element[i] = cvt(result, &sat); \
541 }
542
543 #define VARITHSAT_DO(name, op, optype, cvt, element) \
544 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
545 ppc_avr_t *b) \
546 { \
547 int sat = 0; \
548 int i; \
549 \
550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
551 switch (sizeof(r->element[0])) { \
552 case 1: \
553 VARITHSAT_CASE(optype, op, cvt, element); \
554 break; \
555 case 2: \
556 VARITHSAT_CASE(optype, op, cvt, element); \
557 break; \
558 case 4: \
559 VARITHSAT_CASE(optype, op, cvt, element); \
560 break; \
561 } \
562 } \
563 if (sat) { \
564 env->vscr |= (1 << VSCR_SAT); \
565 } \
566 }
567 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
568 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
569 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
570 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
571 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
572 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
573 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
574 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
575 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
576 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
577 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
578 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
579 #undef VARITHSAT_CASE
580 #undef VARITHSAT_DO
581 #undef VARITHSAT_SIGNED
582 #undef VARITHSAT_UNSIGNED
583
584 #define VAVG_DO(name, element, etype) \
585 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
586 { \
587 int i; \
588 \
589 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
590 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
591 r->element[i] = x >> 1; \
592 } \
593 }
594
595 #define VAVG(type, signed_element, signed_type, unsigned_element, \
596 unsigned_type) \
597 VAVG_DO(avgs##type, signed_element, signed_type) \
598 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
599 VAVG(b, s8, int16_t, u8, uint16_t)
600 VAVG(h, s16, int32_t, u16, uint32_t)
601 VAVG(w, s32, int64_t, u32, uint64_t)
602 #undef VAVG_DO
603 #undef VAVG
604
605 #define VCF(suffix, cvt, element) \
606 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
607 ppc_avr_t *b, uint32_t uim) \
608 { \
609 int i; \
610 \
611 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
612 float32 t = cvt(b->element[i], &env->vec_status); \
613 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
614 } \
615 }
616 VCF(ux, uint32_to_float32, u32)
617 VCF(sx, int32_to_float32, s32)
618 #undef VCF
619
620 #define VCMP_DO(suffix, compare, element, record) \
621 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
622 ppc_avr_t *a, ppc_avr_t *b) \
623 { \
624 uint64_t ones = (uint64_t)-1; \
625 uint64_t all = ones; \
626 uint64_t none = 0; \
627 int i; \
628 \
629 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
630 uint64_t result = (a->element[i] compare b->element[i] ? \
631 ones : 0x0); \
632 switch (sizeof(a->element[0])) { \
633 case 8: \
634 r->u64[i] = result; \
635 break; \
636 case 4: \
637 r->u32[i] = result; \
638 break; \
639 case 2: \
640 r->u16[i] = result; \
641 break; \
642 case 1: \
643 r->u8[i] = result; \
644 break; \
645 } \
646 all &= result; \
647 none |= result; \
648 } \
649 if (record) { \
650 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
651 } \
652 }
653 #define VCMP(suffix, compare, element) \
654 VCMP_DO(suffix, compare, element, 0) \
655 VCMP_DO(suffix##_dot, compare, element, 1)
656 VCMP(equb, ==, u8)
657 VCMP(equh, ==, u16)
658 VCMP(equw, ==, u32)
659 VCMP(equd, ==, u64)
660 VCMP(gtub, >, u8)
661 VCMP(gtuh, >, u16)
662 VCMP(gtuw, >, u32)
663 VCMP(gtud, >, u64)
664 VCMP(gtsb, >, s8)
665 VCMP(gtsh, >, s16)
666 VCMP(gtsw, >, s32)
667 VCMP(gtsd, >, s64)
668 #undef VCMP_DO
669 #undef VCMP
670
671 #define VCMPFP_DO(suffix, compare, order, record) \
672 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
673 ppc_avr_t *a, ppc_avr_t *b) \
674 { \
675 uint32_t ones = (uint32_t)-1; \
676 uint32_t all = ones; \
677 uint32_t none = 0; \
678 int i; \
679 \
680 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
681 uint32_t result; \
682 int rel = float32_compare_quiet(a->f[i], b->f[i], \
683 &env->vec_status); \
684 if (rel == float_relation_unordered) { \
685 result = 0; \
686 } else if (rel compare order) { \
687 result = ones; \
688 } else { \
689 result = 0; \
690 } \
691 r->u32[i] = result; \
692 all &= result; \
693 none |= result; \
694 } \
695 if (record) { \
696 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
697 } \
698 }
699 #define VCMPFP(suffix, compare, order) \
700 VCMPFP_DO(suffix, compare, order, 0) \
701 VCMPFP_DO(suffix##_dot, compare, order, 1)
702 VCMPFP(eqfp, ==, float_relation_equal)
703 VCMPFP(gefp, !=, float_relation_less)
704 VCMPFP(gtfp, ==, float_relation_greater)
705 #undef VCMPFP_DO
706 #undef VCMPFP
707
708 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
709 ppc_avr_t *a, ppc_avr_t *b, int record)
710 {
711 int i;
712 int all_in = 0;
713
714 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
715 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
716 if (le_rel == float_relation_unordered) {
717 r->u32[i] = 0xc0000000;
718 all_in = 1;
719 } else {
720 float32 bneg = float32_chs(b->f[i]);
721 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
722 int le = le_rel != float_relation_greater;
723 int ge = ge_rel != float_relation_less;
724
725 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
726 all_in |= (!le | !ge);
727 }
728 }
729 if (record) {
730 env->crf[6] = (all_in == 0) << 1;
731 }
732 }
733
734 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
735 {
736 vcmpbfp_internal(env, r, a, b, 0);
737 }
738
739 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
740 ppc_avr_t *b)
741 {
742 vcmpbfp_internal(env, r, a, b, 1);
743 }
744
745 #define VCT(suffix, satcvt, element) \
746 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
747 ppc_avr_t *b, uint32_t uim) \
748 { \
749 int i; \
750 int sat = 0; \
751 float_status s = env->vec_status; \
752 \
753 set_float_rounding_mode(float_round_to_zero, &s); \
754 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
755 if (float32_is_any_nan(b->f[i])) { \
756 r->element[i] = 0; \
757 } else { \
758 float64 t = float32_to_float64(b->f[i], &s); \
759 int64_t j; \
760 \
761 t = float64_scalbn(t, uim, &s); \
762 j = float64_to_int64(t, &s); \
763 r->element[i] = satcvt(j, &sat); \
764 } \
765 } \
766 if (sat) { \
767 env->vscr |= (1 << VSCR_SAT); \
768 } \
769 }
770 VCT(uxs, cvtsduw, u32)
771 VCT(sxs, cvtsdsw, s32)
772 #undef VCT
773
774 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
775 ppc_avr_t *b, ppc_avr_t *c)
776 {
777 int sat = 0;
778 int i;
779
780 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
781 int32_t prod = a->s16[i] * b->s16[i];
782 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
783
784 r->s16[i] = cvtswsh(t, &sat);
785 }
786
787 if (sat) {
788 env->vscr |= (1 << VSCR_SAT);
789 }
790 }
791
792 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
793 ppc_avr_t *b, ppc_avr_t *c)
794 {
795 int sat = 0;
796 int i;
797
798 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
799 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
800 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
801 r->s16[i] = cvtswsh(t, &sat);
802 }
803
804 if (sat) {
805 env->vscr |= (1 << VSCR_SAT);
806 }
807 }
808
809 #define VMINMAX_DO(name, compare, element) \
810 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
811 { \
812 int i; \
813 \
814 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
815 if (a->element[i] compare b->element[i]) { \
816 r->element[i] = b->element[i]; \
817 } else { \
818 r->element[i] = a->element[i]; \
819 } \
820 } \
821 }
822 #define VMINMAX(suffix, element) \
823 VMINMAX_DO(min##suffix, >, element) \
824 VMINMAX_DO(max##suffix, <, element)
825 VMINMAX(sb, s8)
826 VMINMAX(sh, s16)
827 VMINMAX(sw, s32)
828 VMINMAX(sd, s64)
829 VMINMAX(ub, u8)
830 VMINMAX(uh, u16)
831 VMINMAX(uw, u32)
832 VMINMAX(ud, u64)
833 #undef VMINMAX_DO
834 #undef VMINMAX
835
836 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
837 {
838 int i;
839
840 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
841 int32_t prod = a->s16[i] * b->s16[i];
842 r->s16[i] = (int16_t) (prod + c->s16[i]);
843 }
844 }
845
846 #define VMRG_DO(name, element, highp) \
847 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
848 { \
849 ppc_avr_t result; \
850 int i; \
851 size_t n_elems = ARRAY_SIZE(r->element); \
852 \
853 for (i = 0; i < n_elems / 2; i++) { \
854 if (highp) { \
855 result.element[i*2+HI_IDX] = a->element[i]; \
856 result.element[i*2+LO_IDX] = b->element[i]; \
857 } else { \
858 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
859 b->element[n_elems - i - 1]; \
860 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
861 a->element[n_elems - i - 1]; \
862 } \
863 } \
864 *r = result; \
865 }
866 #if defined(HOST_WORDS_BIGENDIAN)
867 #define MRGHI 0
868 #define MRGLO 1
869 #else
870 #define MRGHI 1
871 #define MRGLO 0
872 #endif
873 #define VMRG(suffix, element) \
874 VMRG_DO(mrgl##suffix, element, MRGHI) \
875 VMRG_DO(mrgh##suffix, element, MRGLO)
876 VMRG(b, u8)
877 VMRG(h, u16)
878 VMRG(w, u32)
879 #undef VMRG_DO
880 #undef VMRG
881 #undef MRGHI
882 #undef MRGLO
883
884 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
885 ppc_avr_t *b, ppc_avr_t *c)
886 {
887 int32_t prod[16];
888 int i;
889
890 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
891 prod[i] = (int32_t)a->s8[i] * b->u8[i];
892 }
893
894 VECTOR_FOR_INORDER_I(i, s32) {
895 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
896 prod[4 * i + 2] + prod[4 * i + 3];
897 }
898 }
899
900 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
901 ppc_avr_t *b, ppc_avr_t *c)
902 {
903 int32_t prod[8];
904 int i;
905
906 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
907 prod[i] = a->s16[i] * b->s16[i];
908 }
909
910 VECTOR_FOR_INORDER_I(i, s32) {
911 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
912 }
913 }
914
915 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
916 ppc_avr_t *b, ppc_avr_t *c)
917 {
918 int32_t prod[8];
919 int i;
920 int sat = 0;
921
922 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
923 prod[i] = (int32_t)a->s16[i] * b->s16[i];
924 }
925
926 VECTOR_FOR_INORDER_I(i, s32) {
927 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
928
929 r->u32[i] = cvtsdsw(t, &sat);
930 }
931
932 if (sat) {
933 env->vscr |= (1 << VSCR_SAT);
934 }
935 }
936
937 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
938 ppc_avr_t *b, ppc_avr_t *c)
939 {
940 uint16_t prod[16];
941 int i;
942
943 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
944 prod[i] = a->u8[i] * b->u8[i];
945 }
946
947 VECTOR_FOR_INORDER_I(i, u32) {
948 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
949 prod[4 * i + 2] + prod[4 * i + 3];
950 }
951 }
952
953 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
954 ppc_avr_t *b, ppc_avr_t *c)
955 {
956 uint32_t prod[8];
957 int i;
958
959 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
960 prod[i] = a->u16[i] * b->u16[i];
961 }
962
963 VECTOR_FOR_INORDER_I(i, u32) {
964 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
965 }
966 }
967
968 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
969 ppc_avr_t *b, ppc_avr_t *c)
970 {
971 uint32_t prod[8];
972 int i;
973 int sat = 0;
974
975 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
976 prod[i] = a->u16[i] * b->u16[i];
977 }
978
979 VECTOR_FOR_INORDER_I(i, s32) {
980 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
981
982 r->u32[i] = cvtuduw(t, &sat);
983 }
984
985 if (sat) {
986 env->vscr |= (1 << VSCR_SAT);
987 }
988 }
989
990 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
991 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
992 { \
993 int i; \
994 \
995 VECTOR_FOR_INORDER_I(i, prod_element) { \
996 if (evenp) { \
997 r->prod_element[i] = \
998 (cast)a->mul_element[i * 2 + HI_IDX] * \
999 (cast)b->mul_element[i * 2 + HI_IDX]; \
1000 } else { \
1001 r->prod_element[i] = \
1002 (cast)a->mul_element[i * 2 + LO_IDX] * \
1003 (cast)b->mul_element[i * 2 + LO_IDX]; \
1004 } \
1005 } \
1006 }
1007 #define VMUL(suffix, mul_element, prod_element, cast) \
1008 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1009 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1010 VMUL(sb, s8, s16, int16_t)
1011 VMUL(sh, s16, s32, int32_t)
1012 VMUL(sw, s32, s64, int64_t)
1013 VMUL(ub, u8, u16, uint16_t)
1014 VMUL(uh, u16, u32, uint32_t)
1015 VMUL(uw, u32, u64, uint64_t)
1016 #undef VMUL_DO
1017 #undef VMUL
1018
1019 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1020 ppc_avr_t *c)
1021 {
1022 ppc_avr_t result;
1023 int i;
1024
1025 VECTOR_FOR_INORDER_I(i, u8) {
1026 int s = c->u8[i] & 0x1f;
1027 #if defined(HOST_WORDS_BIGENDIAN)
1028 int index = s & 0xf;
1029 #else
1030 int index = 15 - (s & 0xf);
1031 #endif
1032
1033 if (s & 0x10) {
1034 result.u8[i] = b->u8[index];
1035 } else {
1036 result.u8[i] = a->u8[index];
1037 }
1038 }
1039 *r = result;
1040 }
1041
1042 #if defined(HOST_WORDS_BIGENDIAN)
1043 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1044 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1045 #else
1046 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1047 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1048 #endif
1049
1050 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1051 {
1052 int i;
1053 uint64_t perm = 0;
1054
1055 VECTOR_FOR_INORDER_I(i, u8) {
1056 int index = VBPERMQ_INDEX(b, i);
1057
1058 if (index < 128) {
1059 uint64_t mask = (1ull << (63-(index & 0x3F)));
1060 if (a->u64[VBPERMQ_DW(index)] & mask) {
1061 perm |= (0x8000 >> i);
1062 }
1063 }
1064 }
1065
1066 r->u64[HI_IDX] = perm;
1067 r->u64[LO_IDX] = 0;
1068 }
1069
1070 #undef VBPERMQ_INDEX
1071 #undef VBPERMQ_DW
1072
1073 static const uint64_t VGBBD_MASKS[256] = {
1074 0x0000000000000000ull, /* 00 */
1075 0x0000000000000080ull, /* 01 */
1076 0x0000000000008000ull, /* 02 */
1077 0x0000000000008080ull, /* 03 */
1078 0x0000000000800000ull, /* 04 */
1079 0x0000000000800080ull, /* 05 */
1080 0x0000000000808000ull, /* 06 */
1081 0x0000000000808080ull, /* 07 */
1082 0x0000000080000000ull, /* 08 */
1083 0x0000000080000080ull, /* 09 */
1084 0x0000000080008000ull, /* 0A */
1085 0x0000000080008080ull, /* 0B */
1086 0x0000000080800000ull, /* 0C */
1087 0x0000000080800080ull, /* 0D */
1088 0x0000000080808000ull, /* 0E */
1089 0x0000000080808080ull, /* 0F */
1090 0x0000008000000000ull, /* 10 */
1091 0x0000008000000080ull, /* 11 */
1092 0x0000008000008000ull, /* 12 */
1093 0x0000008000008080ull, /* 13 */
1094 0x0000008000800000ull, /* 14 */
1095 0x0000008000800080ull, /* 15 */
1096 0x0000008000808000ull, /* 16 */
1097 0x0000008000808080ull, /* 17 */
1098 0x0000008080000000ull, /* 18 */
1099 0x0000008080000080ull, /* 19 */
1100 0x0000008080008000ull, /* 1A */
1101 0x0000008080008080ull, /* 1B */
1102 0x0000008080800000ull, /* 1C */
1103 0x0000008080800080ull, /* 1D */
1104 0x0000008080808000ull, /* 1E */
1105 0x0000008080808080ull, /* 1F */
1106 0x0000800000000000ull, /* 20 */
1107 0x0000800000000080ull, /* 21 */
1108 0x0000800000008000ull, /* 22 */
1109 0x0000800000008080ull, /* 23 */
1110 0x0000800000800000ull, /* 24 */
1111 0x0000800000800080ull, /* 25 */
1112 0x0000800000808000ull, /* 26 */
1113 0x0000800000808080ull, /* 27 */
1114 0x0000800080000000ull, /* 28 */
1115 0x0000800080000080ull, /* 29 */
1116 0x0000800080008000ull, /* 2A */
1117 0x0000800080008080ull, /* 2B */
1118 0x0000800080800000ull, /* 2C */
1119 0x0000800080800080ull, /* 2D */
1120 0x0000800080808000ull, /* 2E */
1121 0x0000800080808080ull, /* 2F */
1122 0x0000808000000000ull, /* 30 */
1123 0x0000808000000080ull, /* 31 */
1124 0x0000808000008000ull, /* 32 */
1125 0x0000808000008080ull, /* 33 */
1126 0x0000808000800000ull, /* 34 */
1127 0x0000808000800080ull, /* 35 */
1128 0x0000808000808000ull, /* 36 */
1129 0x0000808000808080ull, /* 37 */
1130 0x0000808080000000ull, /* 38 */
1131 0x0000808080000080ull, /* 39 */
1132 0x0000808080008000ull, /* 3A */
1133 0x0000808080008080ull, /* 3B */
1134 0x0000808080800000ull, /* 3C */
1135 0x0000808080800080ull, /* 3D */
1136 0x0000808080808000ull, /* 3E */
1137 0x0000808080808080ull, /* 3F */
1138 0x0080000000000000ull, /* 40 */
1139 0x0080000000000080ull, /* 41 */
1140 0x0080000000008000ull, /* 42 */
1141 0x0080000000008080ull, /* 43 */
1142 0x0080000000800000ull, /* 44 */
1143 0x0080000000800080ull, /* 45 */
1144 0x0080000000808000ull, /* 46 */
1145 0x0080000000808080ull, /* 47 */
1146 0x0080000080000000ull, /* 48 */
1147 0x0080000080000080ull, /* 49 */
1148 0x0080000080008000ull, /* 4A */
1149 0x0080000080008080ull, /* 4B */
1150 0x0080000080800000ull, /* 4C */
1151 0x0080000080800080ull, /* 4D */
1152 0x0080000080808000ull, /* 4E */
1153 0x0080000080808080ull, /* 4F */
1154 0x0080008000000000ull, /* 50 */
1155 0x0080008000000080ull, /* 51 */
1156 0x0080008000008000ull, /* 52 */
1157 0x0080008000008080ull, /* 53 */
1158 0x0080008000800000ull, /* 54 */
1159 0x0080008000800080ull, /* 55 */
1160 0x0080008000808000ull, /* 56 */
1161 0x0080008000808080ull, /* 57 */
1162 0x0080008080000000ull, /* 58 */
1163 0x0080008080000080ull, /* 59 */
1164 0x0080008080008000ull, /* 5A */
1165 0x0080008080008080ull, /* 5B */
1166 0x0080008080800000ull, /* 5C */
1167 0x0080008080800080ull, /* 5D */
1168 0x0080008080808000ull, /* 5E */
1169 0x0080008080808080ull, /* 5F */
1170 0x0080800000000000ull, /* 60 */
1171 0x0080800000000080ull, /* 61 */
1172 0x0080800000008000ull, /* 62 */
1173 0x0080800000008080ull, /* 63 */
1174 0x0080800000800000ull, /* 64 */
1175 0x0080800000800080ull, /* 65 */
1176 0x0080800000808000ull, /* 66 */
1177 0x0080800000808080ull, /* 67 */
1178 0x0080800080000000ull, /* 68 */
1179 0x0080800080000080ull, /* 69 */
1180 0x0080800080008000ull, /* 6A */
1181 0x0080800080008080ull, /* 6B */
1182 0x0080800080800000ull, /* 6C */
1183 0x0080800080800080ull, /* 6D */
1184 0x0080800080808000ull, /* 6E */
1185 0x0080800080808080ull, /* 6F */
1186 0x0080808000000000ull, /* 70 */
1187 0x0080808000000080ull, /* 71 */
1188 0x0080808000008000ull, /* 72 */
1189 0x0080808000008080ull, /* 73 */
1190 0x0080808000800000ull, /* 74 */
1191 0x0080808000800080ull, /* 75 */
1192 0x0080808000808000ull, /* 76 */
1193 0x0080808000808080ull, /* 77 */
1194 0x0080808080000000ull, /* 78 */
1195 0x0080808080000080ull, /* 79 */
1196 0x0080808080008000ull, /* 7A */
1197 0x0080808080008080ull, /* 7B */
1198 0x0080808080800000ull, /* 7C */
1199 0x0080808080800080ull, /* 7D */
1200 0x0080808080808000ull, /* 7E */
1201 0x0080808080808080ull, /* 7F */
1202 0x8000000000000000ull, /* 80 */
1203 0x8000000000000080ull, /* 81 */
1204 0x8000000000008000ull, /* 82 */
1205 0x8000000000008080ull, /* 83 */
1206 0x8000000000800000ull, /* 84 */
1207 0x8000000000800080ull, /* 85 */
1208 0x8000000000808000ull, /* 86 */
1209 0x8000000000808080ull, /* 87 */
1210 0x8000000080000000ull, /* 88 */
1211 0x8000000080000080ull, /* 89 */
1212 0x8000000080008000ull, /* 8A */
1213 0x8000000080008080ull, /* 8B */
1214 0x8000000080800000ull, /* 8C */
1215 0x8000000080800080ull, /* 8D */
1216 0x8000000080808000ull, /* 8E */
1217 0x8000000080808080ull, /* 8F */
1218 0x8000008000000000ull, /* 90 */
1219 0x8000008000000080ull, /* 91 */
1220 0x8000008000008000ull, /* 92 */
1221 0x8000008000008080ull, /* 93 */
1222 0x8000008000800000ull, /* 94 */
1223 0x8000008000800080ull, /* 95 */
1224 0x8000008000808000ull, /* 96 */
1225 0x8000008000808080ull, /* 97 */
1226 0x8000008080000000ull, /* 98 */
1227 0x8000008080000080ull, /* 99 */
1228 0x8000008080008000ull, /* 9A */
1229 0x8000008080008080ull, /* 9B */
1230 0x8000008080800000ull, /* 9C */
1231 0x8000008080800080ull, /* 9D */
1232 0x8000008080808000ull, /* 9E */
1233 0x8000008080808080ull, /* 9F */
1234 0x8000800000000000ull, /* A0 */
1235 0x8000800000000080ull, /* A1 */
1236 0x8000800000008000ull, /* A2 */
1237 0x8000800000008080ull, /* A3 */
1238 0x8000800000800000ull, /* A4 */
1239 0x8000800000800080ull, /* A5 */
1240 0x8000800000808000ull, /* A6 */
1241 0x8000800000808080ull, /* A7 */
1242 0x8000800080000000ull, /* A8 */
1243 0x8000800080000080ull, /* A9 */
1244 0x8000800080008000ull, /* AA */
1245 0x8000800080008080ull, /* AB */
1246 0x8000800080800000ull, /* AC */
1247 0x8000800080800080ull, /* AD */
1248 0x8000800080808000ull, /* AE */
1249 0x8000800080808080ull, /* AF */
1250 0x8000808000000000ull, /* B0 */
1251 0x8000808000000080ull, /* B1 */
1252 0x8000808000008000ull, /* B2 */
1253 0x8000808000008080ull, /* B3 */
1254 0x8000808000800000ull, /* B4 */
1255 0x8000808000800080ull, /* B5 */
1256 0x8000808000808000ull, /* B6 */
1257 0x8000808000808080ull, /* B7 */
1258 0x8000808080000000ull, /* B8 */
1259 0x8000808080000080ull, /* B9 */
1260 0x8000808080008000ull, /* BA */
1261 0x8000808080008080ull, /* BB */
1262 0x8000808080800000ull, /* BC */
1263 0x8000808080800080ull, /* BD */
1264 0x8000808080808000ull, /* BE */
1265 0x8000808080808080ull, /* BF */
1266 0x8080000000000000ull, /* C0 */
1267 0x8080000000000080ull, /* C1 */
1268 0x8080000000008000ull, /* C2 */
1269 0x8080000000008080ull, /* C3 */
1270 0x8080000000800000ull, /* C4 */
1271 0x8080000000800080ull, /* C5 */
1272 0x8080000000808000ull, /* C6 */
1273 0x8080000000808080ull, /* C7 */
1274 0x8080000080000000ull, /* C8 */
1275 0x8080000080000080ull, /* C9 */
1276 0x8080000080008000ull, /* CA */
1277 0x8080000080008080ull, /* CB */
1278 0x8080000080800000ull, /* CC */
1279 0x8080000080800080ull, /* CD */
1280 0x8080000080808000ull, /* CE */
1281 0x8080000080808080ull, /* CF */
1282 0x8080008000000000ull, /* D0 */
1283 0x8080008000000080ull, /* D1 */
1284 0x8080008000008000ull, /* D2 */
1285 0x8080008000008080ull, /* D3 */
1286 0x8080008000800000ull, /* D4 */
1287 0x8080008000800080ull, /* D5 */
1288 0x8080008000808000ull, /* D6 */
1289 0x8080008000808080ull, /* D7 */
1290 0x8080008080000000ull, /* D8 */
1291 0x8080008080000080ull, /* D9 */
1292 0x8080008080008000ull, /* DA */
1293 0x8080008080008080ull, /* DB */
1294 0x8080008080800000ull, /* DC */
1295 0x8080008080800080ull, /* DD */
1296 0x8080008080808000ull, /* DE */
1297 0x8080008080808080ull, /* DF */
1298 0x8080800000000000ull, /* E0 */
1299 0x8080800000000080ull, /* E1 */
1300 0x8080800000008000ull, /* E2 */
1301 0x8080800000008080ull, /* E3 */
1302 0x8080800000800000ull, /* E4 */
1303 0x8080800000800080ull, /* E5 */
1304 0x8080800000808000ull, /* E6 */
1305 0x8080800000808080ull, /* E7 */
1306 0x8080800080000000ull, /* E8 */
1307 0x8080800080000080ull, /* E9 */
1308 0x8080800080008000ull, /* EA */
1309 0x8080800080008080ull, /* EB */
1310 0x8080800080800000ull, /* EC */
1311 0x8080800080800080ull, /* ED */
1312 0x8080800080808000ull, /* EE */
1313 0x8080800080808080ull, /* EF */
1314 0x8080808000000000ull, /* F0 */
1315 0x8080808000000080ull, /* F1 */
1316 0x8080808000008000ull, /* F2 */
1317 0x8080808000008080ull, /* F3 */
1318 0x8080808000800000ull, /* F4 */
1319 0x8080808000800080ull, /* F5 */
1320 0x8080808000808000ull, /* F6 */
1321 0x8080808000808080ull, /* F7 */
1322 0x8080808080000000ull, /* F8 */
1323 0x8080808080000080ull, /* F9 */
1324 0x8080808080008000ull, /* FA */
1325 0x8080808080008080ull, /* FB */
1326 0x8080808080800000ull, /* FC */
1327 0x8080808080800080ull, /* FD */
1328 0x8080808080808000ull, /* FE */
1329 0x8080808080808080ull, /* FF */
1330 };
1331
1332 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1333 {
1334 int i;
1335 uint64_t t[2] = { 0, 0 };
1336
1337 VECTOR_FOR_INORDER_I(i, u8) {
1338 #if defined(HOST_WORDS_BIGENDIAN)
1339 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1340 #else
1341 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1342 #endif
1343 }
1344
1345 r->u64[0] = t[0];
1346 r->u64[1] = t[1];
1347 }
1348
1349 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1350 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1351 { \
1352 int i, j; \
1353 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1354 \
1355 VECTOR_FOR_INORDER_I(i, srcfld) { \
1356 prod[i] = 0; \
1357 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1358 if (a->srcfld[i] & (1ull<<j)) { \
1359 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1360 } \
1361 } \
1362 } \
1363 \
1364 VECTOR_FOR_INORDER_I(i, trgfld) { \
1365 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1366 } \
1367 }
1368
1369 PMSUM(vpmsumb, u8, u16, uint16_t)
1370 PMSUM(vpmsumh, u16, u32, uint32_t)
1371 PMSUM(vpmsumw, u32, u64, uint64_t)
1372
1373 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1374 {
1375
1376 #ifdef CONFIG_INT128
1377 int i, j;
1378 __uint128_t prod[2];
1379
1380 VECTOR_FOR_INORDER_I(i, u64) {
1381 prod[i] = 0;
1382 for (j = 0; j < 64; j++) {
1383 if (a->u64[i] & (1ull<<j)) {
1384 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1385 }
1386 }
1387 }
1388
1389 r->u128 = prod[0] ^ prod[1];
1390
1391 #else
1392 int i, j;
1393 ppc_avr_t prod[2];
1394
1395 VECTOR_FOR_INORDER_I(i, u64) {
1396 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1397 for (j = 0; j < 64; j++) {
1398 if (a->u64[i] & (1ull<<j)) {
1399 ppc_avr_t bshift;
1400 if (j == 0) {
1401 bshift.u64[HI_IDX] = 0;
1402 bshift.u64[LO_IDX] = b->u64[i];
1403 } else {
1404 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1405 bshift.u64[LO_IDX] = b->u64[i] << j;
1406 }
1407 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1408 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1409 }
1410 }
1411 }
1412
1413 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1414 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1415 #endif
1416 }
1417
1418
1419 #if defined(HOST_WORDS_BIGENDIAN)
1420 #define PKBIG 1
1421 #else
1422 #define PKBIG 0
1423 #endif
1424 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1425 {
1426 int i, j;
1427 ppc_avr_t result;
1428 #if defined(HOST_WORDS_BIGENDIAN)
1429 const ppc_avr_t *x[2] = { a, b };
1430 #else
1431 const ppc_avr_t *x[2] = { b, a };
1432 #endif
1433
1434 VECTOR_FOR_INORDER_I(i, u64) {
1435 VECTOR_FOR_INORDER_I(j, u32) {
1436 uint32_t e = x[i]->u32[j];
1437
1438 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1439 ((e >> 6) & 0x3e0) |
1440 ((e >> 3) & 0x1f));
1441 }
1442 }
1443 *r = result;
1444 }
1445
1446 #define VPK(suffix, from, to, cvt, dosat) \
1447 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1448 ppc_avr_t *a, ppc_avr_t *b) \
1449 { \
1450 int i; \
1451 int sat = 0; \
1452 ppc_avr_t result; \
1453 ppc_avr_t *a0 = PKBIG ? a : b; \
1454 ppc_avr_t *a1 = PKBIG ? b : a; \
1455 \
1456 VECTOR_FOR_INORDER_I(i, from) { \
1457 result.to[i] = cvt(a0->from[i], &sat); \
1458 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1459 } \
1460 *r = result; \
1461 if (dosat && sat) { \
1462 env->vscr |= (1 << VSCR_SAT); \
1463 } \
1464 }
1465 #define I(x, y) (x)
1466 VPK(shss, s16, s8, cvtshsb, 1)
1467 VPK(shus, s16, u8, cvtshub, 1)
1468 VPK(swss, s32, s16, cvtswsh, 1)
1469 VPK(swus, s32, u16, cvtswuh, 1)
1470 VPK(sdss, s64, s32, cvtsdsw, 1)
1471 VPK(sdus, s64, u32, cvtsduw, 1)
1472 VPK(uhus, u16, u8, cvtuhub, 1)
1473 VPK(uwus, u32, u16, cvtuwuh, 1)
1474 VPK(udus, u64, u32, cvtuduw, 1)
1475 VPK(uhum, u16, u8, I, 0)
1476 VPK(uwum, u32, u16, I, 0)
1477 VPK(udum, u64, u32, I, 0)
1478 #undef I
1479 #undef VPK
1480 #undef PKBIG
1481
1482 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1483 {
1484 int i;
1485
1486 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1487 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1488 }
1489 }
1490
1491 #define VRFI(suffix, rounding) \
1492 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1493 ppc_avr_t *b) \
1494 { \
1495 int i; \
1496 float_status s = env->vec_status; \
1497 \
1498 set_float_rounding_mode(rounding, &s); \
1499 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1500 r->f[i] = float32_round_to_int (b->f[i], &s); \
1501 } \
1502 }
1503 VRFI(n, float_round_nearest_even)
1504 VRFI(m, float_round_down)
1505 VRFI(p, float_round_up)
1506 VRFI(z, float_round_to_zero)
1507 #undef VRFI
1508
1509 #define VROTATE(suffix, element, mask) \
1510 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1511 { \
1512 int i; \
1513 \
1514 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1515 unsigned int shift = b->element[i] & mask; \
1516 r->element[i] = (a->element[i] << shift) | \
1517 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1518 } \
1519 }
1520 VROTATE(b, u8, 0x7)
1521 VROTATE(h, u16, 0xF)
1522 VROTATE(w, u32, 0x1F)
1523 VROTATE(d, u64, 0x3F)
1524 #undef VROTATE
1525
1526 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1527 {
1528 int i;
1529
1530 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1531 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1532
1533 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1534 }
1535 }
1536
1537 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1538 ppc_avr_t *c)
1539 {
1540 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1541 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1542 }
1543
1544 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1545 {
1546 int i;
1547
1548 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1549 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1550 }
1551 }
1552
1553 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1554 {
1555 int i;
1556
1557 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1558 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1559 }
1560 }
1561
1562 /* The specification says that the results are undefined if all of the
1563 * shift counts are not identical. We check to make sure that they are
1564 * to conform to what real hardware appears to do. */
1565 #define VSHIFT(suffix, leftp) \
1566 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1567 { \
1568 int shift = b->u8[LO_IDX*15] & 0x7; \
1569 int doit = 1; \
1570 int i; \
1571 \
1572 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1573 doit = doit && ((b->u8[i] & 0x7) == shift); \
1574 } \
1575 if (doit) { \
1576 if (shift == 0) { \
1577 *r = *a; \
1578 } else if (leftp) { \
1579 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1580 \
1581 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1582 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1583 } else { \
1584 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1585 \
1586 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1587 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1588 } \
1589 } \
1590 }
1591 VSHIFT(l, 1)
1592 VSHIFT(r, 0)
1593 #undef VSHIFT
1594
1595 #define VSL(suffix, element, mask) \
1596 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1597 { \
1598 int i; \
1599 \
1600 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1601 unsigned int shift = b->element[i] & mask; \
1602 \
1603 r->element[i] = a->element[i] << shift; \
1604 } \
1605 }
1606 VSL(b, u8, 0x7)
1607 VSL(h, u16, 0x0F)
1608 VSL(w, u32, 0x1F)
1609 VSL(d, u64, 0x3F)
1610 #undef VSL
1611
1612 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1613 {
1614 int sh = shift & 0xf;
1615 int i;
1616 ppc_avr_t result;
1617
1618 #if defined(HOST_WORDS_BIGENDIAN)
1619 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1620 int index = sh + i;
1621 if (index > 0xf) {
1622 result.u8[i] = b->u8[index - 0x10];
1623 } else {
1624 result.u8[i] = a->u8[index];
1625 }
1626 }
1627 #else
1628 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1629 int index = (16 - sh) + i;
1630 if (index > 0xf) {
1631 result.u8[i] = a->u8[index - 0x10];
1632 } else {
1633 result.u8[i] = b->u8[index];
1634 }
1635 }
1636 #endif
1637 *r = result;
1638 }
1639
1640 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1641 {
1642 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1643
1644 #if defined(HOST_WORDS_BIGENDIAN)
1645 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1646 memset(&r->u8[16-sh], 0, sh);
1647 #else
1648 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1649 memset(&r->u8[0], 0, sh);
1650 #endif
1651 }
1652
1653 /* Experimental testing shows that hardware masks the immediate. */
1654 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1655 #if defined(HOST_WORDS_BIGENDIAN)
1656 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1657 #else
1658 #define SPLAT_ELEMENT(element) \
1659 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1660 #endif
1661 #define VSPLT(suffix, element) \
1662 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1663 { \
1664 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1665 int i; \
1666 \
1667 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1668 r->element[i] = s; \
1669 } \
1670 }
1671 VSPLT(b, u8)
1672 VSPLT(h, u16)
1673 VSPLT(w, u32)
1674 #undef VSPLT
1675 #undef SPLAT_ELEMENT
1676 #undef _SPLAT_MASKED
1677
1678 #define VSPLTI(suffix, element, splat_type) \
1679 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1680 { \
1681 splat_type x = (int8_t)(splat << 3) >> 3; \
1682 int i; \
1683 \
1684 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1685 r->element[i] = x; \
1686 } \
1687 }
1688 VSPLTI(b, s8, int8_t)
1689 VSPLTI(h, s16, int16_t)
1690 VSPLTI(w, s32, int32_t)
1691 #undef VSPLTI
1692
1693 #define VSR(suffix, element, mask) \
1694 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1695 { \
1696 int i; \
1697 \
1698 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1699 unsigned int shift = b->element[i] & mask; \
1700 r->element[i] = a->element[i] >> shift; \
1701 } \
1702 }
1703 VSR(ab, s8, 0x7)
1704 VSR(ah, s16, 0xF)
1705 VSR(aw, s32, 0x1F)
1706 VSR(ad, s64, 0x3F)
1707 VSR(b, u8, 0x7)
1708 VSR(h, u16, 0xF)
1709 VSR(w, u32, 0x1F)
1710 VSR(d, u64, 0x3F)
1711 #undef VSR
1712
1713 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1714 {
1715 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1716
1717 #if defined(HOST_WORDS_BIGENDIAN)
1718 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1719 memset(&r->u8[0], 0, sh);
1720 #else
1721 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1722 memset(&r->u8[16 - sh], 0, sh);
1723 #endif
1724 }
1725
1726 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1727 {
1728 int i;
1729
1730 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1731 r->u32[i] = a->u32[i] >= b->u32[i];
1732 }
1733 }
1734
1735 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1736 {
1737 int64_t t;
1738 int i, upper;
1739 ppc_avr_t result;
1740 int sat = 0;
1741
1742 #if defined(HOST_WORDS_BIGENDIAN)
1743 upper = ARRAY_SIZE(r->s32)-1;
1744 #else
1745 upper = 0;
1746 #endif
1747 t = (int64_t)b->s32[upper];
1748 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1749 t += a->s32[i];
1750 result.s32[i] = 0;
1751 }
1752 result.s32[upper] = cvtsdsw(t, &sat);
1753 *r = result;
1754
1755 if (sat) {
1756 env->vscr |= (1 << VSCR_SAT);
1757 }
1758 }
1759
1760 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1761 {
1762 int i, j, upper;
1763 ppc_avr_t result;
1764 int sat = 0;
1765
1766 #if defined(HOST_WORDS_BIGENDIAN)
1767 upper = 1;
1768 #else
1769 upper = 0;
1770 #endif
1771 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1772 int64_t t = (int64_t)b->s32[upper + i * 2];
1773
1774 result.u64[i] = 0;
1775 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1776 t += a->s32[2 * i + j];
1777 }
1778 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1779 }
1780
1781 *r = result;
1782 if (sat) {
1783 env->vscr |= (1 << VSCR_SAT);
1784 }
1785 }
1786
1787 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1788 {
1789 int i, j;
1790 int sat = 0;
1791
1792 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1793 int64_t t = (int64_t)b->s32[i];
1794
1795 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1796 t += a->s8[4 * i + j];
1797 }
1798 r->s32[i] = cvtsdsw(t, &sat);
1799 }
1800
1801 if (sat) {
1802 env->vscr |= (1 << VSCR_SAT);
1803 }
1804 }
1805
1806 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1807 {
1808 int sat = 0;
1809 int i;
1810
1811 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1812 int64_t t = (int64_t)b->s32[i];
1813
1814 t += a->s16[2 * i] + a->s16[2 * i + 1];
1815 r->s32[i] = cvtsdsw(t, &sat);
1816 }
1817
1818 if (sat) {
1819 env->vscr |= (1 << VSCR_SAT);
1820 }
1821 }
1822
1823 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1824 {
1825 int i, j;
1826 int sat = 0;
1827
1828 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1829 uint64_t t = (uint64_t)b->u32[i];
1830
1831 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1832 t += a->u8[4 * i + j];
1833 }
1834 r->u32[i] = cvtuduw(t, &sat);
1835 }
1836
1837 if (sat) {
1838 env->vscr |= (1 << VSCR_SAT);
1839 }
1840 }
1841
1842 #if defined(HOST_WORDS_BIGENDIAN)
1843 #define UPKHI 1
1844 #define UPKLO 0
1845 #else
1846 #define UPKHI 0
1847 #define UPKLO 1
1848 #endif
1849 #define VUPKPX(suffix, hi) \
1850 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1851 { \
1852 int i; \
1853 ppc_avr_t result; \
1854 \
1855 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1856 uint16_t e = b->u16[hi ? i : i+4]; \
1857 uint8_t a = (e >> 15) ? 0xff : 0; \
1858 uint8_t r = (e >> 10) & 0x1f; \
1859 uint8_t g = (e >> 5) & 0x1f; \
1860 uint8_t b = e & 0x1f; \
1861 \
1862 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1863 } \
1864 *r = result; \
1865 }
1866 VUPKPX(lpx, UPKLO)
1867 VUPKPX(hpx, UPKHI)
1868 #undef VUPKPX
1869
1870 #define VUPK(suffix, unpacked, packee, hi) \
1871 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1872 { \
1873 int i; \
1874 ppc_avr_t result; \
1875 \
1876 if (hi) { \
1877 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1878 result.unpacked[i] = b->packee[i]; \
1879 } \
1880 } else { \
1881 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1882 i++) { \
1883 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1884 } \
1885 } \
1886 *r = result; \
1887 }
1888 VUPK(hsb, s16, s8, UPKHI)
1889 VUPK(hsh, s32, s16, UPKHI)
1890 VUPK(hsw, s64, s32, UPKHI)
1891 VUPK(lsb, s16, s8, UPKLO)
1892 VUPK(lsh, s32, s16, UPKLO)
1893 VUPK(lsw, s64, s32, UPKLO)
1894 #undef VUPK
1895 #undef UPKHI
1896 #undef UPKLO
1897
1898 #define VGENERIC_DO(name, element) \
1899 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1900 { \
1901 int i; \
1902 \
1903 VECTOR_FOR_INORDER_I(i, element) { \
1904 r->element[i] = name(b->element[i]); \
1905 } \
1906 }
1907
1908 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1909 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1910 #define clzw(v) clz32((v))
1911 #define clzd(v) clz64((v))
1912
1913 VGENERIC_DO(clzb, u8)
1914 VGENERIC_DO(clzh, u16)
1915 VGENERIC_DO(clzw, u32)
1916 VGENERIC_DO(clzd, u64)
1917
1918 #undef clzb
1919 #undef clzh
1920 #undef clzw
1921 #undef clzd
1922
1923 #define popcntb(v) ctpop8(v)
1924 #define popcnth(v) ctpop16(v)
1925 #define popcntw(v) ctpop32(v)
1926 #define popcntd(v) ctpop64(v)
1927
1928 VGENERIC_DO(popcntb, u8)
1929 VGENERIC_DO(popcnth, u16)
1930 VGENERIC_DO(popcntw, u32)
1931 VGENERIC_DO(popcntd, u64)
1932
1933 #undef popcntb
1934 #undef popcnth
1935 #undef popcntw
1936 #undef popcntd
1937
1938 #undef VGENERIC_DO
1939
1940 #if defined(HOST_WORDS_BIGENDIAN)
1941 #define QW_ONE { .u64 = { 0, 1 } }
1942 #else
1943 #define QW_ONE { .u64 = { 1, 0 } }
1944 #endif
1945
1946 #ifndef CONFIG_INT128
1947
1948 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1949 {
1950 t->u64[0] = ~a.u64[0];
1951 t->u64[1] = ~a.u64[1];
1952 }
1953
1954 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1955 {
1956 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1957 return -1;
1958 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1959 return 1;
1960 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1961 return -1;
1962 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1963 return 1;
1964 } else {
1965 return 0;
1966 }
1967 }
1968
1969 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1970 {
1971 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1972 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1973 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1974 }
1975
1976 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1977 {
1978 ppc_avr_t not_a;
1979 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1980 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1981 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1982 avr_qw_not(&not_a, a);
1983 return avr_qw_cmpu(not_a, b) < 0;
1984 }
1985
1986 #endif
1987
1988 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1989 {
1990 #ifdef CONFIG_INT128
1991 r->u128 = a->u128 + b->u128;
1992 #else
1993 avr_qw_add(r, *a, *b);
1994 #endif
1995 }
1996
1997 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1998 {
1999 #ifdef CONFIG_INT128
2000 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2001 #else
2002
2003 if (c->u64[LO_IDX] & 1) {
2004 ppc_avr_t tmp;
2005
2006 tmp.u64[HI_IDX] = 0;
2007 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2008 avr_qw_add(&tmp, *a, tmp);
2009 avr_qw_add(r, tmp, *b);
2010 } else {
2011 avr_qw_add(r, *a, *b);
2012 }
2013 #endif
2014 }
2015
2016 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2017 {
2018 #ifdef CONFIG_INT128
2019 r->u128 = (~a->u128 < b->u128);
2020 #else
2021 ppc_avr_t not_a;
2022
2023 avr_qw_not(&not_a, *a);
2024
2025 r->u64[HI_IDX] = 0;
2026 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2027 #endif
2028 }
2029
2030 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2031 {
2032 #ifdef CONFIG_INT128
2033 int carry_out = (~a->u128 < b->u128);
2034 if (!carry_out && (c->u128 & 1)) {
2035 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2036 ((a->u128 != 0) || (b->u128 != 0));
2037 }
2038 r->u128 = carry_out;
2039 #else
2040
2041 int carry_in = c->u64[LO_IDX] & 1;
2042 int carry_out = 0;
2043 ppc_avr_t tmp;
2044
2045 carry_out = avr_qw_addc(&tmp, *a, *b);
2046
2047 if (!carry_out && carry_in) {
2048 ppc_avr_t one = QW_ONE;
2049 carry_out = avr_qw_addc(&tmp, tmp, one);
2050 }
2051 r->u64[HI_IDX] = 0;
2052 r->u64[LO_IDX] = carry_out;
2053 #endif
2054 }
2055
2056 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2057 {
2058 #ifdef CONFIG_INT128
2059 r->u128 = a->u128 - b->u128;
2060 #else
2061 ppc_avr_t tmp;
2062 ppc_avr_t one = QW_ONE;
2063
2064 avr_qw_not(&tmp, *b);
2065 avr_qw_add(&tmp, *a, tmp);
2066 avr_qw_add(r, tmp, one);
2067 #endif
2068 }
2069
2070 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2071 {
2072 #ifdef CONFIG_INT128
2073 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2074 #else
2075 ppc_avr_t tmp, sum;
2076
2077 avr_qw_not(&tmp, *b);
2078 avr_qw_add(&sum, *a, tmp);
2079
2080 tmp.u64[HI_IDX] = 0;
2081 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2082 avr_qw_add(r, sum, tmp);
2083 #endif
2084 }
2085
2086 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2087 {
2088 #ifdef CONFIG_INT128
2089 r->u128 = (~a->u128 < ~b->u128) ||
2090 (a->u128 + ~b->u128 == (__uint128_t)-1);
2091 #else
2092 int carry = (avr_qw_cmpu(*a, *b) > 0);
2093 if (!carry) {
2094 ppc_avr_t tmp;
2095 avr_qw_not(&tmp, *b);
2096 avr_qw_add(&tmp, *a, tmp);
2097 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2098 }
2099 r->u64[HI_IDX] = 0;
2100 r->u64[LO_IDX] = carry;
2101 #endif
2102 }
2103
2104 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2105 {
2106 #ifdef CONFIG_INT128
2107 r->u128 =
2108 (~a->u128 < ~b->u128) ||
2109 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2110 #else
2111 int carry_in = c->u64[LO_IDX] & 1;
2112 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2113 if (!carry_out && carry_in) {
2114 ppc_avr_t tmp;
2115 avr_qw_not(&tmp, *b);
2116 avr_qw_add(&tmp, *a, tmp);
2117 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2118 }
2119
2120 r->u64[HI_IDX] = 0;
2121 r->u64[LO_IDX] = carry_out;
2122 #endif
2123 }
2124
2125 #define BCD_PLUS_PREF_1 0xC
2126 #define BCD_PLUS_PREF_2 0xF
2127 #define BCD_PLUS_ALT_1 0xA
2128 #define BCD_NEG_PREF 0xD
2129 #define BCD_NEG_ALT 0xB
2130 #define BCD_PLUS_ALT_2 0xE
2131
2132 #if defined(HOST_WORDS_BIGENDIAN)
2133 #define BCD_DIG_BYTE(n) (15 - (n/2))
2134 #else
2135 #define BCD_DIG_BYTE(n) (n/2)
2136 #endif
2137
2138 static int bcd_get_sgn(ppc_avr_t *bcd)
2139 {
2140 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2141 case BCD_PLUS_PREF_1:
2142 case BCD_PLUS_PREF_2:
2143 case BCD_PLUS_ALT_1:
2144 case BCD_PLUS_ALT_2:
2145 {
2146 return 1;
2147 }
2148
2149 case BCD_NEG_PREF:
2150 case BCD_NEG_ALT:
2151 {
2152 return -1;
2153 }
2154
2155 default:
2156 {
2157 return 0;
2158 }
2159 }
2160 }
2161
2162 static int bcd_preferred_sgn(int sgn, int ps)
2163 {
2164 if (sgn >= 0) {
2165 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2166 } else {
2167 return BCD_NEG_PREF;
2168 }
2169 }
2170
2171 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2172 {
2173 uint8_t result;
2174 if (n & 1) {
2175 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2176 } else {
2177 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2178 }
2179
2180 if (unlikely(result > 9)) {
2181 *invalid = true;
2182 }
2183 return result;
2184 }
2185
2186 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2187 {
2188 if (n & 1) {
2189 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2190 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2191 } else {
2192 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2193 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2194 }
2195 }
2196
2197 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2198 {
2199 int i;
2200 int invalid = 0;
2201 for (i = 31; i > 0; i--) {
2202 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2203 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2204 if (unlikely(invalid)) {
2205 return 0; /* doesn't matter */
2206 } else if (dig_a > dig_b) {
2207 return 1;
2208 } else if (dig_a < dig_b) {
2209 return -1;
2210 }
2211 }
2212
2213 return 0;
2214 }
2215
2216 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2217 int *overflow)
2218 {
2219 int carry = 0;
2220 int i;
2221 int is_zero = 1;
2222 for (i = 1; i <= 31; i++) {
2223 uint8_t digit = bcd_get_digit(a, i, invalid) +
2224 bcd_get_digit(b, i, invalid) + carry;
2225 is_zero &= (digit == 0);
2226 if (digit > 9) {
2227 carry = 1;
2228 digit -= 10;
2229 } else {
2230 carry = 0;
2231 }
2232
2233 bcd_put_digit(t, digit, i);
2234
2235 if (unlikely(*invalid)) {
2236 return -1;
2237 }
2238 }
2239
2240 *overflow = carry;
2241 return is_zero;
2242 }
2243
2244 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2245 int *overflow)
2246 {
2247 int carry = 0;
2248 int i;
2249 int is_zero = 1;
2250 for (i = 1; i <= 31; i++) {
2251 uint8_t digit = bcd_get_digit(a, i, invalid) -
2252 bcd_get_digit(b, i, invalid) + carry;
2253 is_zero &= (digit == 0);
2254 if (digit & 0x80) {
2255 carry = -1;
2256 digit += 10;
2257 } else {
2258 carry = 0;
2259 }
2260
2261 bcd_put_digit(t, digit, i);
2262
2263 if (unlikely(*invalid)) {
2264 return -1;
2265 }
2266 }
2267
2268 *overflow = carry;
2269 return is_zero;
2270 }
2271
2272 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2273 {
2274
2275 int sgna = bcd_get_sgn(a);
2276 int sgnb = bcd_get_sgn(b);
2277 int invalid = (sgna == 0) || (sgnb == 0);
2278 int overflow = 0;
2279 int zero = 0;
2280 uint32_t cr = 0;
2281 ppc_avr_t result = { .u64 = { 0, 0 } };
2282
2283 if (!invalid) {
2284 if (sgna == sgnb) {
2285 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2286 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2287 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2288 } else if (bcd_cmp_mag(a, b) > 0) {
2289 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2290 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2291 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2292 } else {
2293 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2294 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2295 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2296 }
2297 }
2298
2299 if (unlikely(invalid)) {
2300 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2301 cr = 1 << CRF_SO;
2302 } else if (overflow) {
2303 cr |= 1 << CRF_SO;
2304 } else if (zero) {
2305 cr = 1 << CRF_EQ;
2306 }
2307
2308 *r = result;
2309
2310 return cr;
2311 }
2312
2313 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2314 {
2315 ppc_avr_t bcopy = *b;
2316 int sgnb = bcd_get_sgn(b);
2317 if (sgnb < 0) {
2318 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2319 } else if (sgnb > 0) {
2320 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2321 }
2322 /* else invalid ... defer to bcdadd code for proper handling */
2323
2324 return helper_bcdadd(r, a, &bcopy, ps);
2325 }
2326
2327 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2328 {
2329 int i;
2330 VECTOR_FOR_INORDER_I(i, u8) {
2331 r->u8[i] = AES_sbox[a->u8[i]];
2332 }
2333 }
2334
2335 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2336 {
2337 ppc_avr_t result;
2338 int i;
2339
2340 VECTOR_FOR_INORDER_I(i, u32) {
2341 result.AVRW(i) = b->AVRW(i) ^
2342 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2343 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2344 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2345 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2346 }
2347 *r = result;
2348 }
2349
2350 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2351 {
2352 ppc_avr_t result;
2353 int i;
2354
2355 VECTOR_FOR_INORDER_I(i, u8) {
2356 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2357 }
2358 *r = result;
2359 }
2360
2361 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2362 {
2363 /* This differs from what is written in ISA V2.07. The RTL is */
2364 /* incorrect and will be fixed in V2.07B. */
2365 int i;
2366 ppc_avr_t tmp;
2367
2368 VECTOR_FOR_INORDER_I(i, u8) {
2369 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2370 }
2371
2372 VECTOR_FOR_INORDER_I(i, u32) {
2373 r->AVRW(i) =
2374 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2375 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2376 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2377 AES_imc[tmp.AVRB(4*i + 3)][3];
2378 }
2379 }
2380
2381 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2382 {
2383 ppc_avr_t result;
2384 int i;
2385
2386 VECTOR_FOR_INORDER_I(i, u8) {
2387 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2388 }
2389 *r = result;
2390 }
2391
2392 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2393 #if defined(HOST_WORDS_BIGENDIAN)
2394 #define EL_IDX(i) (i)
2395 #else
2396 #define EL_IDX(i) (3 - (i))
2397 #endif
2398
2399 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2400 {
2401 int st = (st_six & 0x10) != 0;
2402 int six = st_six & 0xF;
2403 int i;
2404
2405 VECTOR_FOR_INORDER_I(i, u32) {
2406 if (st == 0) {
2407 if ((six & (0x8 >> i)) == 0) {
2408 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2409 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2410 (a->u32[EL_IDX(i)] >> 3);
2411 } else { /* six.bit[i] == 1 */
2412 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2413 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2414 (a->u32[EL_IDX(i)] >> 10);
2415 }
2416 } else { /* st == 1 */
2417 if ((six & (0x8 >> i)) == 0) {
2418 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2419 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2420 ROTRu32(a->u32[EL_IDX(i)], 22);
2421 } else { /* six.bit[i] == 1 */
2422 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2423 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2424 ROTRu32(a->u32[EL_IDX(i)], 25);
2425 }
2426 }
2427 }
2428 }
2429
2430 #undef ROTRu32
2431 #undef EL_IDX
2432
2433 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2434 #if defined(HOST_WORDS_BIGENDIAN)
2435 #define EL_IDX(i) (i)
2436 #else
2437 #define EL_IDX(i) (1 - (i))
2438 #endif
2439
2440 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2441 {
2442 int st = (st_six & 0x10) != 0;
2443 int six = st_six & 0xF;
2444 int i;
2445
2446 VECTOR_FOR_INORDER_I(i, u64) {
2447 if (st == 0) {
2448 if ((six & (0x8 >> (2*i))) == 0) {
2449 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2450 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2451 (a->u64[EL_IDX(i)] >> 7);
2452 } else { /* six.bit[2*i] == 1 */
2453 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2454 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2455 (a->u64[EL_IDX(i)] >> 6);
2456 }
2457 } else { /* st == 1 */
2458 if ((six & (0x8 >> (2*i))) == 0) {
2459 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2460 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2461 ROTRu64(a->u64[EL_IDX(i)], 39);
2462 } else { /* six.bit[2*i] == 1 */
2463 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2464 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2465 ROTRu64(a->u64[EL_IDX(i)], 41);
2466 }
2467 }
2468 }
2469 }
2470
2471 #undef ROTRu64
2472 #undef EL_IDX
2473
2474 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2475 {
2476 ppc_avr_t result;
2477 int i;
2478
2479 VECTOR_FOR_INORDER_I(i, u8) {
2480 int indexA = c->u8[i] >> 4;
2481 int indexB = c->u8[i] & 0xF;
2482 #if defined(HOST_WORDS_BIGENDIAN)
2483 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2484 #else
2485 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2486 #endif
2487 }
2488 *r = result;
2489 }
2490
2491 #undef VECTOR_FOR_INORDER_I
2492 #undef HI_IDX
2493 #undef LO_IDX
2494
2495 /*****************************************************************************/
2496 /* SPE extension helpers */
2497 /* Use a table to make this quicker */
2498 static const uint8_t hbrev[16] = {
2499 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2500 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2501 };
2502
2503 static inline uint8_t byte_reverse(uint8_t val)
2504 {
2505 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2506 }
2507
2508 static inline uint32_t word_reverse(uint32_t val)
2509 {
2510 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2511 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2512 }
2513
2514 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2515 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2516 {
2517 uint32_t a, b, d, mask;
2518
2519 mask = UINT32_MAX >> (32 - MASKBITS);
2520 a = arg1 & mask;
2521 b = arg2 & mask;
2522 d = word_reverse(1 + word_reverse(a | ~b));
2523 return (arg1 & ~mask) | (d & b);
2524 }
2525
2526 uint32_t helper_cntlsw32(uint32_t val)
2527 {
2528 if (val & 0x80000000) {
2529 return clz32(~val);
2530 } else {
2531 return clz32(val);
2532 }
2533 }
2534
2535 uint32_t helper_cntlzw32(uint32_t val)
2536 {
2537 return clz32(val);
2538 }
2539
2540 /* 440 specific */
2541 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2542 target_ulong low, uint32_t update_Rc)
2543 {
2544 target_ulong mask;
2545 int i;
2546
2547 i = 1;
2548 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2549 if ((high & mask) == 0) {
2550 if (update_Rc) {
2551 env->crf[0] = 0x4;
2552 }
2553 goto done;
2554 }
2555 i++;
2556 }
2557 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2558 if ((low & mask) == 0) {
2559 if (update_Rc) {
2560 env->crf[0] = 0x8;
2561 }
2562 goto done;
2563 }
2564 i++;
2565 }
2566 i = 8;
2567 if (update_Rc) {
2568 env->crf[0] = 0x2;
2569 }
2570 done:
2571 env->xer = (env->xer & ~0x7F) | i;
2572 if (update_Rc) {
2573 env->crf[0] |= xer_so;
2574 }
2575 return i;
2576 }