]> git.proxmox.com Git - mirror_qemu.git/blob - target/ppc/int_helper.c
e1bb69574821507aafbc1d887f7210ba4816aff1
[mirror_qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "internal.h"
22 #include "exec/exec-all.h"
23 #include "qemu/host-utils.h"
24 #include "exec/helper-proto.h"
25 #include "crypto/aes.h"
26
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
30
31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
33 {
34 uint64_t rt = 0;
35 int overflow = 0;
36
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
39
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
45 }
46
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
49 }
50
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
56 }
57 }
58
59 return (target_ulong)rt;
60 }
61
62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
64 {
65 int64_t rt = 0;
66 int overflow = 0;
67
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
70
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
77 }
78
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
81 }
82
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
88 }
89 }
90
91 return (target_ulong)rt;
92 }
93
94 #if defined(TARGET_PPC64)
95
96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 {
98 uint64_t rt = 0;
99 int overflow = 0;
100
101 overflow = divu128(&rt, &ra, rb);
102
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
105 }
106
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
112 }
113 }
114
115 return rt;
116 }
117
118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 {
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
124
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
127 }
128
129 if (oe) {
130
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
135 }
136 }
137
138 return rt;
139 }
140
141 #endif
142
143
144 #if defined(TARGET_PPC64)
145 /* if x = 0xab, returns 0xababababababababa */
146 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
147
148 /* substract 1 from each byte, and with inverse, check if MSB is set at each
149 * byte.
150 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
151 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
152 */
153 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
154
155 /* When you XOR the pattern and there is a match, that byte will be zero */
156 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
157
158 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
159 {
160 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
161 }
162
163 #undef pattern
164 #undef haszero
165 #undef hasvalue
166
167 /* Return invalid random number.
168 *
169 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
170 * random number
171 */
172 target_ulong helper_darn32(void)
173 {
174 return -1;
175 }
176
177 target_ulong helper_darn64(void)
178 {
179 return -1;
180 }
181
182 #endif
183
184 #if defined(TARGET_PPC64)
185
186 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
187 {
188 int i;
189 uint64_t ra = 0;
190
191 for (i = 0; i < 8; i++) {
192 int index = (rs >> (i*8)) & 0xFF;
193 if (index < 64) {
194 if (rb & (1ull << (63-index))) {
195 ra |= 1 << i;
196 }
197 }
198 }
199 return ra;
200 }
201
202 #endif
203
204 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
205 {
206 target_ulong mask = 0xff;
207 target_ulong ra = 0;
208 int i;
209
210 for (i = 0; i < sizeof(target_ulong); i++) {
211 if ((rs & mask) == (rb & mask)) {
212 ra |= mask;
213 }
214 mask <<= 8;
215 }
216 return ra;
217 }
218
219 /* shift right arithmetic helper */
220 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
221 target_ulong shift)
222 {
223 int32_t ret;
224
225 if (likely(!(shift & 0x20))) {
226 if (likely((uint32_t)shift != 0)) {
227 shift &= 0x1f;
228 ret = (int32_t)value >> shift;
229 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
230 env->ca = 0;
231 } else {
232 env->ca = 1;
233 }
234 } else {
235 ret = (int32_t)value;
236 env->ca = 0;
237 }
238 } else {
239 ret = (int32_t)value >> 31;
240 env->ca = (ret != 0);
241 }
242 return (target_long)ret;
243 }
244
245 #if defined(TARGET_PPC64)
246 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
247 target_ulong shift)
248 {
249 int64_t ret;
250
251 if (likely(!(shift & 0x40))) {
252 if (likely((uint64_t)shift != 0)) {
253 shift &= 0x3f;
254 ret = (int64_t)value >> shift;
255 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
256 env->ca = 0;
257 } else {
258 env->ca = 1;
259 }
260 } else {
261 ret = (int64_t)value;
262 env->ca = 0;
263 }
264 } else {
265 ret = (int64_t)value >> 63;
266 env->ca = (ret != 0);
267 }
268 return ret;
269 }
270 #endif
271
272 #if defined(TARGET_PPC64)
273 target_ulong helper_popcntb(target_ulong val)
274 {
275 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
276 0x5555555555555555ULL);
277 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
278 0x3333333333333333ULL);
279 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
280 0x0f0f0f0f0f0f0f0fULL);
281 return val;
282 }
283
284 target_ulong helper_popcntw(target_ulong val)
285 {
286 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
287 0x5555555555555555ULL);
288 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
289 0x3333333333333333ULL);
290 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
291 0x0f0f0f0f0f0f0f0fULL);
292 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
293 0x00ff00ff00ff00ffULL);
294 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
295 0x0000ffff0000ffffULL);
296 return val;
297 }
298
299 target_ulong helper_popcntd(target_ulong val)
300 {
301 return ctpop64(val);
302 }
303 #else
304 target_ulong helper_popcntb(target_ulong val)
305 {
306 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
307 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
308 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
309 return val;
310 }
311
312 target_ulong helper_popcntw(target_ulong val)
313 {
314 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
315 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
316 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
317 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
318 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
319 return val;
320 }
321 #endif
322
323 /*****************************************************************************/
324 /* PowerPC 601 specific instructions (POWER bridge) */
325 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
326 {
327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328
329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
330 (int32_t)arg2 == 0) {
331 env->spr[SPR_MQ] = 0;
332 return INT32_MIN;
333 } else {
334 env->spr[SPR_MQ] = tmp % arg2;
335 return tmp / (int32_t)arg2;
336 }
337 }
338
339 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
340 target_ulong arg2)
341 {
342 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
343
344 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
345 (int32_t)arg2 == 0) {
346 env->so = env->ov = 1;
347 env->spr[SPR_MQ] = 0;
348 return INT32_MIN;
349 } else {
350 env->spr[SPR_MQ] = tmp % arg2;
351 tmp /= (int32_t)arg2;
352 if ((int32_t)tmp != tmp) {
353 env->so = env->ov = 1;
354 } else {
355 env->ov = 0;
356 }
357 return tmp;
358 }
359 }
360
361 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
362 target_ulong arg2)
363 {
364 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
365 (int32_t)arg2 == 0) {
366 env->spr[SPR_MQ] = 0;
367 return INT32_MIN;
368 } else {
369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
370 return (int32_t)arg1 / (int32_t)arg2;
371 }
372 }
373
374 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
375 target_ulong arg2)
376 {
377 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
378 (int32_t)arg2 == 0) {
379 env->so = env->ov = 1;
380 env->spr[SPR_MQ] = 0;
381 return INT32_MIN;
382 } else {
383 env->ov = 0;
384 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
385 return (int32_t)arg1 / (int32_t)arg2;
386 }
387 }
388
389 /*****************************************************************************/
390 /* 602 specific instructions */
391 /* mfrom is the most crazy instruction ever seen, imho ! */
392 /* Real implementation uses a ROM table. Do the same */
393 /* Extremely decomposed:
394 * -arg / 256
395 * return 256 * log10(10 + 1.0) + 0.5
396 */
397 #if !defined(CONFIG_USER_ONLY)
398 target_ulong helper_602_mfrom(target_ulong arg)
399 {
400 if (likely(arg < 602)) {
401 #include "mfrom_table.c"
402 return mfrom_ROM_table[arg];
403 } else {
404 return 0;
405 }
406 }
407 #endif
408
409 /*****************************************************************************/
410 /* Altivec extension helpers */
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define HI_IDX 0
413 #define LO_IDX 1
414 #define AVRB(i) u8[i]
415 #define AVRW(i) u32[i]
416 #else
417 #define HI_IDX 1
418 #define LO_IDX 0
419 #define AVRB(i) u8[15-(i)]
420 #define AVRW(i) u32[3-(i)]
421 #endif
422
423 #if defined(HOST_WORDS_BIGENDIAN)
424 #define VECTOR_FOR_INORDER_I(index, element) \
425 for (index = 0; index < ARRAY_SIZE(r->element); index++)
426 #else
427 #define VECTOR_FOR_INORDER_I(index, element) \
428 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
429 #endif
430
431 /* Saturating arithmetic helpers. */
432 #define SATCVT(from, to, from_type, to_type, min, max) \
433 static inline to_type cvt##from##to(from_type x, int *sat) \
434 { \
435 to_type r; \
436 \
437 if (x < (from_type)min) { \
438 r = min; \
439 *sat = 1; \
440 } else if (x > (from_type)max) { \
441 r = max; \
442 *sat = 1; \
443 } else { \
444 r = x; \
445 } \
446 return r; \
447 }
448 #define SATCVTU(from, to, from_type, to_type, min, max) \
449 static inline to_type cvt##from##to(from_type x, int *sat) \
450 { \
451 to_type r; \
452 \
453 if (x > (from_type)max) { \
454 r = max; \
455 *sat = 1; \
456 } else { \
457 r = x; \
458 } \
459 return r; \
460 }
461 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
462 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
463 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
464
465 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
466 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
467 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
468 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
469 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
470 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
471 #undef SATCVT
472 #undef SATCVTU
473
474 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
475 {
476 int i, j = (sh & 0xf);
477
478 VECTOR_FOR_INORDER_I(i, u8) {
479 r->u8[i] = j++;
480 }
481 }
482
483 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
484 {
485 int i, j = 0x10 - (sh & 0xf);
486
487 VECTOR_FOR_INORDER_I(i, u8) {
488 r->u8[i] = j++;
489 }
490 }
491
492 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
493 {
494 #if defined(HOST_WORDS_BIGENDIAN)
495 env->vscr = r->u32[3];
496 #else
497 env->vscr = r->u32[0];
498 #endif
499 set_flush_to_zero(vscr_nj, &env->vec_status);
500 }
501
502 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
503 {
504 int i;
505
506 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
507 r->u32[i] = ~a->u32[i] < b->u32[i];
508 }
509 }
510
511 /* vprtybw */
512 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
513 {
514 int i;
515 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
516 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
517 res ^= res >> 8;
518 r->u32[i] = res & 1;
519 }
520 }
521
522 /* vprtybd */
523 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
524 {
525 int i;
526 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
527 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
528 res ^= res >> 16;
529 res ^= res >> 8;
530 r->u64[i] = res & 1;
531 }
532 }
533
534 /* vprtybq */
535 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
536 {
537 uint64_t res = b->u64[0] ^ b->u64[1];
538 res ^= res >> 32;
539 res ^= res >> 16;
540 res ^= res >> 8;
541 r->u64[LO_IDX] = res & 1;
542 r->u64[HI_IDX] = 0;
543 }
544
545 #define VARITH_DO(name, op, element) \
546 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
547 { \
548 int i; \
549 \
550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
551 r->element[i] = a->element[i] op b->element[i]; \
552 } \
553 }
554 #define VARITH(suffix, element) \
555 VARITH_DO(add##suffix, +, element) \
556 VARITH_DO(sub##suffix, -, element)
557 VARITH(ubm, u8)
558 VARITH(uhm, u16)
559 VARITH(uwm, u32)
560 VARITH(udm, u64)
561 VARITH_DO(muluwm, *, u32)
562 #undef VARITH_DO
563 #undef VARITH
564
565 #define VARITHFP(suffix, func) \
566 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
567 ppc_avr_t *b) \
568 { \
569 int i; \
570 \
571 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
572 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
573 } \
574 }
575 VARITHFP(addfp, float32_add)
576 VARITHFP(subfp, float32_sub)
577 VARITHFP(minfp, float32_min)
578 VARITHFP(maxfp, float32_max)
579 #undef VARITHFP
580
581 #define VARITHFPFMA(suffix, type) \
582 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
583 ppc_avr_t *b, ppc_avr_t *c) \
584 { \
585 int i; \
586 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
587 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
588 type, &env->vec_status); \
589 } \
590 }
591 VARITHFPFMA(maddfp, 0);
592 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
593 #undef VARITHFPFMA
594
595 #define VARITHSAT_CASE(type, op, cvt, element) \
596 { \
597 type result = (type)a->element[i] op (type)b->element[i]; \
598 r->element[i] = cvt(result, &sat); \
599 }
600
601 #define VARITHSAT_DO(name, op, optype, cvt, element) \
602 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
603 ppc_avr_t *b) \
604 { \
605 int sat = 0; \
606 int i; \
607 \
608 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
609 switch (sizeof(r->element[0])) { \
610 case 1: \
611 VARITHSAT_CASE(optype, op, cvt, element); \
612 break; \
613 case 2: \
614 VARITHSAT_CASE(optype, op, cvt, element); \
615 break; \
616 case 4: \
617 VARITHSAT_CASE(optype, op, cvt, element); \
618 break; \
619 } \
620 } \
621 if (sat) { \
622 env->vscr |= (1 << VSCR_SAT); \
623 } \
624 }
625 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
626 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
627 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
628 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
629 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
630 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
631 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
632 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
633 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
634 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
635 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
636 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
637 #undef VARITHSAT_CASE
638 #undef VARITHSAT_DO
639 #undef VARITHSAT_SIGNED
640 #undef VARITHSAT_UNSIGNED
641
642 #define VAVG_DO(name, element, etype) \
643 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
644 { \
645 int i; \
646 \
647 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
648 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
649 r->element[i] = x >> 1; \
650 } \
651 }
652
653 #define VAVG(type, signed_element, signed_type, unsigned_element, \
654 unsigned_type) \
655 VAVG_DO(avgs##type, signed_element, signed_type) \
656 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
657 VAVG(b, s8, int16_t, u8, uint16_t)
658 VAVG(h, s16, int32_t, u16, uint32_t)
659 VAVG(w, s32, int64_t, u32, uint64_t)
660 #undef VAVG_DO
661 #undef VAVG
662
663 #define VABSDU_DO(name, element) \
664 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
665 { \
666 int i; \
667 \
668 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
669 r->element[i] = (a->element[i] > b->element[i]) ? \
670 (a->element[i] - b->element[i]) : \
671 (b->element[i] - a->element[i]); \
672 } \
673 }
674
675 /* VABSDU - Vector absolute difference unsigned
676 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
677 * element - element type to access from vector
678 */
679 #define VABSDU(type, element) \
680 VABSDU_DO(absdu##type, element)
681 VABSDU(b, u8)
682 VABSDU(h, u16)
683 VABSDU(w, u32)
684 #undef VABSDU_DO
685 #undef VABSDU
686
687 #define VCF(suffix, cvt, element) \
688 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
689 ppc_avr_t *b, uint32_t uim) \
690 { \
691 int i; \
692 \
693 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
694 float32 t = cvt(b->element[i], &env->vec_status); \
695 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
696 } \
697 }
698 VCF(ux, uint32_to_float32, u32)
699 VCF(sx, int32_to_float32, s32)
700 #undef VCF
701
702 #define VCMP_DO(suffix, compare, element, record) \
703 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
704 ppc_avr_t *a, ppc_avr_t *b) \
705 { \
706 uint64_t ones = (uint64_t)-1; \
707 uint64_t all = ones; \
708 uint64_t none = 0; \
709 int i; \
710 \
711 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
712 uint64_t result = (a->element[i] compare b->element[i] ? \
713 ones : 0x0); \
714 switch (sizeof(a->element[0])) { \
715 case 8: \
716 r->u64[i] = result; \
717 break; \
718 case 4: \
719 r->u32[i] = result; \
720 break; \
721 case 2: \
722 r->u16[i] = result; \
723 break; \
724 case 1: \
725 r->u8[i] = result; \
726 break; \
727 } \
728 all &= result; \
729 none |= result; \
730 } \
731 if (record) { \
732 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
733 } \
734 }
735 #define VCMP(suffix, compare, element) \
736 VCMP_DO(suffix, compare, element, 0) \
737 VCMP_DO(suffix##_dot, compare, element, 1)
738 VCMP(equb, ==, u8)
739 VCMP(equh, ==, u16)
740 VCMP(equw, ==, u32)
741 VCMP(equd, ==, u64)
742 VCMP(gtub, >, u8)
743 VCMP(gtuh, >, u16)
744 VCMP(gtuw, >, u32)
745 VCMP(gtud, >, u64)
746 VCMP(gtsb, >, s8)
747 VCMP(gtsh, >, s16)
748 VCMP(gtsw, >, s32)
749 VCMP(gtsd, >, s64)
750 #undef VCMP_DO
751 #undef VCMP
752
753 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
754 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
755 ppc_avr_t *a, ppc_avr_t *b) \
756 { \
757 etype ones = (etype)-1; \
758 etype all = ones; \
759 etype result, none = 0; \
760 int i; \
761 \
762 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
763 if (cmpzero) { \
764 result = ((a->element[i] == 0) \
765 || (b->element[i] == 0) \
766 || (a->element[i] != b->element[i]) ? \
767 ones : 0x0); \
768 } else { \
769 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
770 } \
771 r->element[i] = result; \
772 all &= result; \
773 none |= result; \
774 } \
775 if (record) { \
776 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
777 } \
778 }
779
780 /* VCMPNEZ - Vector compare not equal to zero
781 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
782 * element - element type to access from vector
783 */
784 #define VCMPNE(suffix, element, etype, cmpzero) \
785 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
786 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
787 VCMPNE(zb, u8, uint8_t, 1)
788 VCMPNE(zh, u16, uint16_t, 1)
789 VCMPNE(zw, u32, uint32_t, 1)
790 VCMPNE(b, u8, uint8_t, 0)
791 VCMPNE(h, u16, uint16_t, 0)
792 VCMPNE(w, u32, uint32_t, 0)
793 #undef VCMPNE_DO
794 #undef VCMPNE
795
796 #define VCMPFP_DO(suffix, compare, order, record) \
797 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
798 ppc_avr_t *a, ppc_avr_t *b) \
799 { \
800 uint32_t ones = (uint32_t)-1; \
801 uint32_t all = ones; \
802 uint32_t none = 0; \
803 int i; \
804 \
805 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
806 uint32_t result; \
807 int rel = float32_compare_quiet(a->f[i], b->f[i], \
808 &env->vec_status); \
809 if (rel == float_relation_unordered) { \
810 result = 0; \
811 } else if (rel compare order) { \
812 result = ones; \
813 } else { \
814 result = 0; \
815 } \
816 r->u32[i] = result; \
817 all &= result; \
818 none |= result; \
819 } \
820 if (record) { \
821 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
822 } \
823 }
824 #define VCMPFP(suffix, compare, order) \
825 VCMPFP_DO(suffix, compare, order, 0) \
826 VCMPFP_DO(suffix##_dot, compare, order, 1)
827 VCMPFP(eqfp, ==, float_relation_equal)
828 VCMPFP(gefp, !=, float_relation_less)
829 VCMPFP(gtfp, ==, float_relation_greater)
830 #undef VCMPFP_DO
831 #undef VCMPFP
832
833 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
834 ppc_avr_t *a, ppc_avr_t *b, int record)
835 {
836 int i;
837 int all_in = 0;
838
839 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
840 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
841 if (le_rel == float_relation_unordered) {
842 r->u32[i] = 0xc0000000;
843 all_in = 1;
844 } else {
845 float32 bneg = float32_chs(b->f[i]);
846 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
847 int le = le_rel != float_relation_greater;
848 int ge = ge_rel != float_relation_less;
849
850 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
851 all_in |= (!le | !ge);
852 }
853 }
854 if (record) {
855 env->crf[6] = (all_in == 0) << 1;
856 }
857 }
858
859 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
860 {
861 vcmpbfp_internal(env, r, a, b, 0);
862 }
863
864 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
865 ppc_avr_t *b)
866 {
867 vcmpbfp_internal(env, r, a, b, 1);
868 }
869
870 #define VCT(suffix, satcvt, element) \
871 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
872 ppc_avr_t *b, uint32_t uim) \
873 { \
874 int i; \
875 int sat = 0; \
876 float_status s = env->vec_status; \
877 \
878 set_float_rounding_mode(float_round_to_zero, &s); \
879 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
880 if (float32_is_any_nan(b->f[i])) { \
881 r->element[i] = 0; \
882 } else { \
883 float64 t = float32_to_float64(b->f[i], &s); \
884 int64_t j; \
885 \
886 t = float64_scalbn(t, uim, &s); \
887 j = float64_to_int64(t, &s); \
888 r->element[i] = satcvt(j, &sat); \
889 } \
890 } \
891 if (sat) { \
892 env->vscr |= (1 << VSCR_SAT); \
893 } \
894 }
895 VCT(uxs, cvtsduw, u32)
896 VCT(sxs, cvtsdsw, s32)
897 #undef VCT
898
899 target_ulong helper_vclzlsbb(ppc_avr_t *r)
900 {
901 target_ulong count = 0;
902 int i;
903 VECTOR_FOR_INORDER_I(i, u8) {
904 if (r->u8[i] & 0x01) {
905 break;
906 }
907 count++;
908 }
909 return count;
910 }
911
912 target_ulong helper_vctzlsbb(ppc_avr_t *r)
913 {
914 target_ulong count = 0;
915 int i;
916 #if defined(HOST_WORDS_BIGENDIAN)
917 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
918 #else
919 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
920 #endif
921 if (r->u8[i] & 0x01) {
922 break;
923 }
924 count++;
925 }
926 return count;
927 }
928
929 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
930 ppc_avr_t *b, ppc_avr_t *c)
931 {
932 int sat = 0;
933 int i;
934
935 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
936 int32_t prod = a->s16[i] * b->s16[i];
937 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
938
939 r->s16[i] = cvtswsh(t, &sat);
940 }
941
942 if (sat) {
943 env->vscr |= (1 << VSCR_SAT);
944 }
945 }
946
947 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
949 {
950 int sat = 0;
951 int i;
952
953 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
954 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
955 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
956 r->s16[i] = cvtswsh(t, &sat);
957 }
958
959 if (sat) {
960 env->vscr |= (1 << VSCR_SAT);
961 }
962 }
963
964 #define VMINMAX_DO(name, compare, element) \
965 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
966 { \
967 int i; \
968 \
969 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
970 if (a->element[i] compare b->element[i]) { \
971 r->element[i] = b->element[i]; \
972 } else { \
973 r->element[i] = a->element[i]; \
974 } \
975 } \
976 }
977 #define VMINMAX(suffix, element) \
978 VMINMAX_DO(min##suffix, >, element) \
979 VMINMAX_DO(max##suffix, <, element)
980 VMINMAX(sb, s8)
981 VMINMAX(sh, s16)
982 VMINMAX(sw, s32)
983 VMINMAX(sd, s64)
984 VMINMAX(ub, u8)
985 VMINMAX(uh, u16)
986 VMINMAX(uw, u32)
987 VMINMAX(ud, u64)
988 #undef VMINMAX_DO
989 #undef VMINMAX
990
991 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
992 {
993 int i;
994
995 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
996 int32_t prod = a->s16[i] * b->s16[i];
997 r->s16[i] = (int16_t) (prod + c->s16[i]);
998 }
999 }
1000
1001 #define VMRG_DO(name, element, highp) \
1002 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1003 { \
1004 ppc_avr_t result; \
1005 int i; \
1006 size_t n_elems = ARRAY_SIZE(r->element); \
1007 \
1008 for (i = 0; i < n_elems / 2; i++) { \
1009 if (highp) { \
1010 result.element[i*2+HI_IDX] = a->element[i]; \
1011 result.element[i*2+LO_IDX] = b->element[i]; \
1012 } else { \
1013 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1014 b->element[n_elems - i - 1]; \
1015 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1016 a->element[n_elems - i - 1]; \
1017 } \
1018 } \
1019 *r = result; \
1020 }
1021 #if defined(HOST_WORDS_BIGENDIAN)
1022 #define MRGHI 0
1023 #define MRGLO 1
1024 #else
1025 #define MRGHI 1
1026 #define MRGLO 0
1027 #endif
1028 #define VMRG(suffix, element) \
1029 VMRG_DO(mrgl##suffix, element, MRGHI) \
1030 VMRG_DO(mrgh##suffix, element, MRGLO)
1031 VMRG(b, u8)
1032 VMRG(h, u16)
1033 VMRG(w, u32)
1034 #undef VMRG_DO
1035 #undef VMRG
1036 #undef MRGHI
1037 #undef MRGLO
1038
1039 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1040 ppc_avr_t *b, ppc_avr_t *c)
1041 {
1042 int32_t prod[16];
1043 int i;
1044
1045 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1046 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1047 }
1048
1049 VECTOR_FOR_INORDER_I(i, s32) {
1050 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1051 prod[4 * i + 2] + prod[4 * i + 3];
1052 }
1053 }
1054
1055 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1056 ppc_avr_t *b, ppc_avr_t *c)
1057 {
1058 int32_t prod[8];
1059 int i;
1060
1061 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1062 prod[i] = a->s16[i] * b->s16[i];
1063 }
1064
1065 VECTOR_FOR_INORDER_I(i, s32) {
1066 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1067 }
1068 }
1069
1070 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1071 ppc_avr_t *b, ppc_avr_t *c)
1072 {
1073 int32_t prod[8];
1074 int i;
1075 int sat = 0;
1076
1077 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1078 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1079 }
1080
1081 VECTOR_FOR_INORDER_I(i, s32) {
1082 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1083
1084 r->u32[i] = cvtsdsw(t, &sat);
1085 }
1086
1087 if (sat) {
1088 env->vscr |= (1 << VSCR_SAT);
1089 }
1090 }
1091
1092 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1093 ppc_avr_t *b, ppc_avr_t *c)
1094 {
1095 uint16_t prod[16];
1096 int i;
1097
1098 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1099 prod[i] = a->u8[i] * b->u8[i];
1100 }
1101
1102 VECTOR_FOR_INORDER_I(i, u32) {
1103 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1104 prod[4 * i + 2] + prod[4 * i + 3];
1105 }
1106 }
1107
1108 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1109 ppc_avr_t *b, ppc_avr_t *c)
1110 {
1111 uint32_t prod[8];
1112 int i;
1113
1114 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1115 prod[i] = a->u16[i] * b->u16[i];
1116 }
1117
1118 VECTOR_FOR_INORDER_I(i, u32) {
1119 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1120 }
1121 }
1122
1123 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1124 ppc_avr_t *b, ppc_avr_t *c)
1125 {
1126 uint32_t prod[8];
1127 int i;
1128 int sat = 0;
1129
1130 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1131 prod[i] = a->u16[i] * b->u16[i];
1132 }
1133
1134 VECTOR_FOR_INORDER_I(i, s32) {
1135 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1136
1137 r->u32[i] = cvtuduw(t, &sat);
1138 }
1139
1140 if (sat) {
1141 env->vscr |= (1 << VSCR_SAT);
1142 }
1143 }
1144
1145 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1146 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1147 { \
1148 int i; \
1149 \
1150 VECTOR_FOR_INORDER_I(i, prod_element) { \
1151 if (evenp) { \
1152 r->prod_element[i] = \
1153 (cast)a->mul_element[i * 2 + HI_IDX] * \
1154 (cast)b->mul_element[i * 2 + HI_IDX]; \
1155 } else { \
1156 r->prod_element[i] = \
1157 (cast)a->mul_element[i * 2 + LO_IDX] * \
1158 (cast)b->mul_element[i * 2 + LO_IDX]; \
1159 } \
1160 } \
1161 }
1162 #define VMUL(suffix, mul_element, prod_element, cast) \
1163 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1164 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1165 VMUL(sb, s8, s16, int16_t)
1166 VMUL(sh, s16, s32, int32_t)
1167 VMUL(sw, s32, s64, int64_t)
1168 VMUL(ub, u8, u16, uint16_t)
1169 VMUL(uh, u16, u32, uint32_t)
1170 VMUL(uw, u32, u64, uint64_t)
1171 #undef VMUL_DO
1172 #undef VMUL
1173
1174 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1175 ppc_avr_t *c)
1176 {
1177 ppc_avr_t result;
1178 int i;
1179
1180 VECTOR_FOR_INORDER_I(i, u8) {
1181 int s = c->u8[i] & 0x1f;
1182 #if defined(HOST_WORDS_BIGENDIAN)
1183 int index = s & 0xf;
1184 #else
1185 int index = 15 - (s & 0xf);
1186 #endif
1187
1188 if (s & 0x10) {
1189 result.u8[i] = b->u8[index];
1190 } else {
1191 result.u8[i] = a->u8[index];
1192 }
1193 }
1194 *r = result;
1195 }
1196
1197 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1198 ppc_avr_t *c)
1199 {
1200 ppc_avr_t result;
1201 int i;
1202
1203 VECTOR_FOR_INORDER_I(i, u8) {
1204 int s = c->u8[i] & 0x1f;
1205 #if defined(HOST_WORDS_BIGENDIAN)
1206 int index = 15 - (s & 0xf);
1207 #else
1208 int index = s & 0xf;
1209 #endif
1210
1211 if (s & 0x10) {
1212 result.u8[i] = a->u8[index];
1213 } else {
1214 result.u8[i] = b->u8[index];
1215 }
1216 }
1217 *r = result;
1218 }
1219
1220 #if defined(HOST_WORDS_BIGENDIAN)
1221 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1222 #define VBPERMD_INDEX(i) (i)
1223 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1224 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1225 #else
1226 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1227 #define VBPERMD_INDEX(i) (1 - i)
1228 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1229 #define EXTRACT_BIT(avr, i, index) \
1230 (extract64((avr)->u64[1 - i], 63 - index, 1))
1231 #endif
1232
1233 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1234 {
1235 int i, j;
1236 ppc_avr_t result = { .u64 = { 0, 0 } };
1237 VECTOR_FOR_INORDER_I(i, u64) {
1238 for (j = 0; j < 8; j++) {
1239 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1240 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1241 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1242 }
1243 }
1244 }
1245 *r = result;
1246 }
1247
1248 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1249 {
1250 int i;
1251 uint64_t perm = 0;
1252
1253 VECTOR_FOR_INORDER_I(i, u8) {
1254 int index = VBPERMQ_INDEX(b, i);
1255
1256 if (index < 128) {
1257 uint64_t mask = (1ull << (63-(index & 0x3F)));
1258 if (a->u64[VBPERMQ_DW(index)] & mask) {
1259 perm |= (0x8000 >> i);
1260 }
1261 }
1262 }
1263
1264 r->u64[HI_IDX] = perm;
1265 r->u64[LO_IDX] = 0;
1266 }
1267
1268 #undef VBPERMQ_INDEX
1269 #undef VBPERMQ_DW
1270
1271 static const uint64_t VGBBD_MASKS[256] = {
1272 0x0000000000000000ull, /* 00 */
1273 0x0000000000000080ull, /* 01 */
1274 0x0000000000008000ull, /* 02 */
1275 0x0000000000008080ull, /* 03 */
1276 0x0000000000800000ull, /* 04 */
1277 0x0000000000800080ull, /* 05 */
1278 0x0000000000808000ull, /* 06 */
1279 0x0000000000808080ull, /* 07 */
1280 0x0000000080000000ull, /* 08 */
1281 0x0000000080000080ull, /* 09 */
1282 0x0000000080008000ull, /* 0A */
1283 0x0000000080008080ull, /* 0B */
1284 0x0000000080800000ull, /* 0C */
1285 0x0000000080800080ull, /* 0D */
1286 0x0000000080808000ull, /* 0E */
1287 0x0000000080808080ull, /* 0F */
1288 0x0000008000000000ull, /* 10 */
1289 0x0000008000000080ull, /* 11 */
1290 0x0000008000008000ull, /* 12 */
1291 0x0000008000008080ull, /* 13 */
1292 0x0000008000800000ull, /* 14 */
1293 0x0000008000800080ull, /* 15 */
1294 0x0000008000808000ull, /* 16 */
1295 0x0000008000808080ull, /* 17 */
1296 0x0000008080000000ull, /* 18 */
1297 0x0000008080000080ull, /* 19 */
1298 0x0000008080008000ull, /* 1A */
1299 0x0000008080008080ull, /* 1B */
1300 0x0000008080800000ull, /* 1C */
1301 0x0000008080800080ull, /* 1D */
1302 0x0000008080808000ull, /* 1E */
1303 0x0000008080808080ull, /* 1F */
1304 0x0000800000000000ull, /* 20 */
1305 0x0000800000000080ull, /* 21 */
1306 0x0000800000008000ull, /* 22 */
1307 0x0000800000008080ull, /* 23 */
1308 0x0000800000800000ull, /* 24 */
1309 0x0000800000800080ull, /* 25 */
1310 0x0000800000808000ull, /* 26 */
1311 0x0000800000808080ull, /* 27 */
1312 0x0000800080000000ull, /* 28 */
1313 0x0000800080000080ull, /* 29 */
1314 0x0000800080008000ull, /* 2A */
1315 0x0000800080008080ull, /* 2B */
1316 0x0000800080800000ull, /* 2C */
1317 0x0000800080800080ull, /* 2D */
1318 0x0000800080808000ull, /* 2E */
1319 0x0000800080808080ull, /* 2F */
1320 0x0000808000000000ull, /* 30 */
1321 0x0000808000000080ull, /* 31 */
1322 0x0000808000008000ull, /* 32 */
1323 0x0000808000008080ull, /* 33 */
1324 0x0000808000800000ull, /* 34 */
1325 0x0000808000800080ull, /* 35 */
1326 0x0000808000808000ull, /* 36 */
1327 0x0000808000808080ull, /* 37 */
1328 0x0000808080000000ull, /* 38 */
1329 0x0000808080000080ull, /* 39 */
1330 0x0000808080008000ull, /* 3A */
1331 0x0000808080008080ull, /* 3B */
1332 0x0000808080800000ull, /* 3C */
1333 0x0000808080800080ull, /* 3D */
1334 0x0000808080808000ull, /* 3E */
1335 0x0000808080808080ull, /* 3F */
1336 0x0080000000000000ull, /* 40 */
1337 0x0080000000000080ull, /* 41 */
1338 0x0080000000008000ull, /* 42 */
1339 0x0080000000008080ull, /* 43 */
1340 0x0080000000800000ull, /* 44 */
1341 0x0080000000800080ull, /* 45 */
1342 0x0080000000808000ull, /* 46 */
1343 0x0080000000808080ull, /* 47 */
1344 0x0080000080000000ull, /* 48 */
1345 0x0080000080000080ull, /* 49 */
1346 0x0080000080008000ull, /* 4A */
1347 0x0080000080008080ull, /* 4B */
1348 0x0080000080800000ull, /* 4C */
1349 0x0080000080800080ull, /* 4D */
1350 0x0080000080808000ull, /* 4E */
1351 0x0080000080808080ull, /* 4F */
1352 0x0080008000000000ull, /* 50 */
1353 0x0080008000000080ull, /* 51 */
1354 0x0080008000008000ull, /* 52 */
1355 0x0080008000008080ull, /* 53 */
1356 0x0080008000800000ull, /* 54 */
1357 0x0080008000800080ull, /* 55 */
1358 0x0080008000808000ull, /* 56 */
1359 0x0080008000808080ull, /* 57 */
1360 0x0080008080000000ull, /* 58 */
1361 0x0080008080000080ull, /* 59 */
1362 0x0080008080008000ull, /* 5A */
1363 0x0080008080008080ull, /* 5B */
1364 0x0080008080800000ull, /* 5C */
1365 0x0080008080800080ull, /* 5D */
1366 0x0080008080808000ull, /* 5E */
1367 0x0080008080808080ull, /* 5F */
1368 0x0080800000000000ull, /* 60 */
1369 0x0080800000000080ull, /* 61 */
1370 0x0080800000008000ull, /* 62 */
1371 0x0080800000008080ull, /* 63 */
1372 0x0080800000800000ull, /* 64 */
1373 0x0080800000800080ull, /* 65 */
1374 0x0080800000808000ull, /* 66 */
1375 0x0080800000808080ull, /* 67 */
1376 0x0080800080000000ull, /* 68 */
1377 0x0080800080000080ull, /* 69 */
1378 0x0080800080008000ull, /* 6A */
1379 0x0080800080008080ull, /* 6B */
1380 0x0080800080800000ull, /* 6C */
1381 0x0080800080800080ull, /* 6D */
1382 0x0080800080808000ull, /* 6E */
1383 0x0080800080808080ull, /* 6F */
1384 0x0080808000000000ull, /* 70 */
1385 0x0080808000000080ull, /* 71 */
1386 0x0080808000008000ull, /* 72 */
1387 0x0080808000008080ull, /* 73 */
1388 0x0080808000800000ull, /* 74 */
1389 0x0080808000800080ull, /* 75 */
1390 0x0080808000808000ull, /* 76 */
1391 0x0080808000808080ull, /* 77 */
1392 0x0080808080000000ull, /* 78 */
1393 0x0080808080000080ull, /* 79 */
1394 0x0080808080008000ull, /* 7A */
1395 0x0080808080008080ull, /* 7B */
1396 0x0080808080800000ull, /* 7C */
1397 0x0080808080800080ull, /* 7D */
1398 0x0080808080808000ull, /* 7E */
1399 0x0080808080808080ull, /* 7F */
1400 0x8000000000000000ull, /* 80 */
1401 0x8000000000000080ull, /* 81 */
1402 0x8000000000008000ull, /* 82 */
1403 0x8000000000008080ull, /* 83 */
1404 0x8000000000800000ull, /* 84 */
1405 0x8000000000800080ull, /* 85 */
1406 0x8000000000808000ull, /* 86 */
1407 0x8000000000808080ull, /* 87 */
1408 0x8000000080000000ull, /* 88 */
1409 0x8000000080000080ull, /* 89 */
1410 0x8000000080008000ull, /* 8A */
1411 0x8000000080008080ull, /* 8B */
1412 0x8000000080800000ull, /* 8C */
1413 0x8000000080800080ull, /* 8D */
1414 0x8000000080808000ull, /* 8E */
1415 0x8000000080808080ull, /* 8F */
1416 0x8000008000000000ull, /* 90 */
1417 0x8000008000000080ull, /* 91 */
1418 0x8000008000008000ull, /* 92 */
1419 0x8000008000008080ull, /* 93 */
1420 0x8000008000800000ull, /* 94 */
1421 0x8000008000800080ull, /* 95 */
1422 0x8000008000808000ull, /* 96 */
1423 0x8000008000808080ull, /* 97 */
1424 0x8000008080000000ull, /* 98 */
1425 0x8000008080000080ull, /* 99 */
1426 0x8000008080008000ull, /* 9A */
1427 0x8000008080008080ull, /* 9B */
1428 0x8000008080800000ull, /* 9C */
1429 0x8000008080800080ull, /* 9D */
1430 0x8000008080808000ull, /* 9E */
1431 0x8000008080808080ull, /* 9F */
1432 0x8000800000000000ull, /* A0 */
1433 0x8000800000000080ull, /* A1 */
1434 0x8000800000008000ull, /* A2 */
1435 0x8000800000008080ull, /* A3 */
1436 0x8000800000800000ull, /* A4 */
1437 0x8000800000800080ull, /* A5 */
1438 0x8000800000808000ull, /* A6 */
1439 0x8000800000808080ull, /* A7 */
1440 0x8000800080000000ull, /* A8 */
1441 0x8000800080000080ull, /* A9 */
1442 0x8000800080008000ull, /* AA */
1443 0x8000800080008080ull, /* AB */
1444 0x8000800080800000ull, /* AC */
1445 0x8000800080800080ull, /* AD */
1446 0x8000800080808000ull, /* AE */
1447 0x8000800080808080ull, /* AF */
1448 0x8000808000000000ull, /* B0 */
1449 0x8000808000000080ull, /* B1 */
1450 0x8000808000008000ull, /* B2 */
1451 0x8000808000008080ull, /* B3 */
1452 0x8000808000800000ull, /* B4 */
1453 0x8000808000800080ull, /* B5 */
1454 0x8000808000808000ull, /* B6 */
1455 0x8000808000808080ull, /* B7 */
1456 0x8000808080000000ull, /* B8 */
1457 0x8000808080000080ull, /* B9 */
1458 0x8000808080008000ull, /* BA */
1459 0x8000808080008080ull, /* BB */
1460 0x8000808080800000ull, /* BC */
1461 0x8000808080800080ull, /* BD */
1462 0x8000808080808000ull, /* BE */
1463 0x8000808080808080ull, /* BF */
1464 0x8080000000000000ull, /* C0 */
1465 0x8080000000000080ull, /* C1 */
1466 0x8080000000008000ull, /* C2 */
1467 0x8080000000008080ull, /* C3 */
1468 0x8080000000800000ull, /* C4 */
1469 0x8080000000800080ull, /* C5 */
1470 0x8080000000808000ull, /* C6 */
1471 0x8080000000808080ull, /* C7 */
1472 0x8080000080000000ull, /* C8 */
1473 0x8080000080000080ull, /* C9 */
1474 0x8080000080008000ull, /* CA */
1475 0x8080000080008080ull, /* CB */
1476 0x8080000080800000ull, /* CC */
1477 0x8080000080800080ull, /* CD */
1478 0x8080000080808000ull, /* CE */
1479 0x8080000080808080ull, /* CF */
1480 0x8080008000000000ull, /* D0 */
1481 0x8080008000000080ull, /* D1 */
1482 0x8080008000008000ull, /* D2 */
1483 0x8080008000008080ull, /* D3 */
1484 0x8080008000800000ull, /* D4 */
1485 0x8080008000800080ull, /* D5 */
1486 0x8080008000808000ull, /* D6 */
1487 0x8080008000808080ull, /* D7 */
1488 0x8080008080000000ull, /* D8 */
1489 0x8080008080000080ull, /* D9 */
1490 0x8080008080008000ull, /* DA */
1491 0x8080008080008080ull, /* DB */
1492 0x8080008080800000ull, /* DC */
1493 0x8080008080800080ull, /* DD */
1494 0x8080008080808000ull, /* DE */
1495 0x8080008080808080ull, /* DF */
1496 0x8080800000000000ull, /* E0 */
1497 0x8080800000000080ull, /* E1 */
1498 0x8080800000008000ull, /* E2 */
1499 0x8080800000008080ull, /* E3 */
1500 0x8080800000800000ull, /* E4 */
1501 0x8080800000800080ull, /* E5 */
1502 0x8080800000808000ull, /* E6 */
1503 0x8080800000808080ull, /* E7 */
1504 0x8080800080000000ull, /* E8 */
1505 0x8080800080000080ull, /* E9 */
1506 0x8080800080008000ull, /* EA */
1507 0x8080800080008080ull, /* EB */
1508 0x8080800080800000ull, /* EC */
1509 0x8080800080800080ull, /* ED */
1510 0x8080800080808000ull, /* EE */
1511 0x8080800080808080ull, /* EF */
1512 0x8080808000000000ull, /* F0 */
1513 0x8080808000000080ull, /* F1 */
1514 0x8080808000008000ull, /* F2 */
1515 0x8080808000008080ull, /* F3 */
1516 0x8080808000800000ull, /* F4 */
1517 0x8080808000800080ull, /* F5 */
1518 0x8080808000808000ull, /* F6 */
1519 0x8080808000808080ull, /* F7 */
1520 0x8080808080000000ull, /* F8 */
1521 0x8080808080000080ull, /* F9 */
1522 0x8080808080008000ull, /* FA */
1523 0x8080808080008080ull, /* FB */
1524 0x8080808080800000ull, /* FC */
1525 0x8080808080800080ull, /* FD */
1526 0x8080808080808000ull, /* FE */
1527 0x8080808080808080ull, /* FF */
1528 };
1529
1530 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1531 {
1532 int i;
1533 uint64_t t[2] = { 0, 0 };
1534
1535 VECTOR_FOR_INORDER_I(i, u8) {
1536 #if defined(HOST_WORDS_BIGENDIAN)
1537 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1538 #else
1539 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1540 #endif
1541 }
1542
1543 r->u64[0] = t[0];
1544 r->u64[1] = t[1];
1545 }
1546
1547 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1548 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1549 { \
1550 int i, j; \
1551 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1552 \
1553 VECTOR_FOR_INORDER_I(i, srcfld) { \
1554 prod[i] = 0; \
1555 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1556 if (a->srcfld[i] & (1ull<<j)) { \
1557 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1558 } \
1559 } \
1560 } \
1561 \
1562 VECTOR_FOR_INORDER_I(i, trgfld) { \
1563 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1564 } \
1565 }
1566
1567 PMSUM(vpmsumb, u8, u16, uint16_t)
1568 PMSUM(vpmsumh, u16, u32, uint32_t)
1569 PMSUM(vpmsumw, u32, u64, uint64_t)
1570
1571 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1572 {
1573
1574 #ifdef CONFIG_INT128
1575 int i, j;
1576 __uint128_t prod[2];
1577
1578 VECTOR_FOR_INORDER_I(i, u64) {
1579 prod[i] = 0;
1580 for (j = 0; j < 64; j++) {
1581 if (a->u64[i] & (1ull<<j)) {
1582 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1583 }
1584 }
1585 }
1586
1587 r->u128 = prod[0] ^ prod[1];
1588
1589 #else
1590 int i, j;
1591 ppc_avr_t prod[2];
1592
1593 VECTOR_FOR_INORDER_I(i, u64) {
1594 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1595 for (j = 0; j < 64; j++) {
1596 if (a->u64[i] & (1ull<<j)) {
1597 ppc_avr_t bshift;
1598 if (j == 0) {
1599 bshift.u64[HI_IDX] = 0;
1600 bshift.u64[LO_IDX] = b->u64[i];
1601 } else {
1602 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1603 bshift.u64[LO_IDX] = b->u64[i] << j;
1604 }
1605 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1606 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1607 }
1608 }
1609 }
1610
1611 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1612 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1613 #endif
1614 }
1615
1616
1617 #if defined(HOST_WORDS_BIGENDIAN)
1618 #define PKBIG 1
1619 #else
1620 #define PKBIG 0
1621 #endif
1622 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1623 {
1624 int i, j;
1625 ppc_avr_t result;
1626 #if defined(HOST_WORDS_BIGENDIAN)
1627 const ppc_avr_t *x[2] = { a, b };
1628 #else
1629 const ppc_avr_t *x[2] = { b, a };
1630 #endif
1631
1632 VECTOR_FOR_INORDER_I(i, u64) {
1633 VECTOR_FOR_INORDER_I(j, u32) {
1634 uint32_t e = x[i]->u32[j];
1635
1636 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1637 ((e >> 6) & 0x3e0) |
1638 ((e >> 3) & 0x1f));
1639 }
1640 }
1641 *r = result;
1642 }
1643
1644 #define VPK(suffix, from, to, cvt, dosat) \
1645 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1646 ppc_avr_t *a, ppc_avr_t *b) \
1647 { \
1648 int i; \
1649 int sat = 0; \
1650 ppc_avr_t result; \
1651 ppc_avr_t *a0 = PKBIG ? a : b; \
1652 ppc_avr_t *a1 = PKBIG ? b : a; \
1653 \
1654 VECTOR_FOR_INORDER_I(i, from) { \
1655 result.to[i] = cvt(a0->from[i], &sat); \
1656 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1657 } \
1658 *r = result; \
1659 if (dosat && sat) { \
1660 env->vscr |= (1 << VSCR_SAT); \
1661 } \
1662 }
1663 #define I(x, y) (x)
1664 VPK(shss, s16, s8, cvtshsb, 1)
1665 VPK(shus, s16, u8, cvtshub, 1)
1666 VPK(swss, s32, s16, cvtswsh, 1)
1667 VPK(swus, s32, u16, cvtswuh, 1)
1668 VPK(sdss, s64, s32, cvtsdsw, 1)
1669 VPK(sdus, s64, u32, cvtsduw, 1)
1670 VPK(uhus, u16, u8, cvtuhub, 1)
1671 VPK(uwus, u32, u16, cvtuwuh, 1)
1672 VPK(udus, u64, u32, cvtuduw, 1)
1673 VPK(uhum, u16, u8, I, 0)
1674 VPK(uwum, u32, u16, I, 0)
1675 VPK(udum, u64, u32, I, 0)
1676 #undef I
1677 #undef VPK
1678 #undef PKBIG
1679
1680 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1681 {
1682 int i;
1683
1684 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1685 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1686 }
1687 }
1688
1689 #define VRFI(suffix, rounding) \
1690 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1691 ppc_avr_t *b) \
1692 { \
1693 int i; \
1694 float_status s = env->vec_status; \
1695 \
1696 set_float_rounding_mode(rounding, &s); \
1697 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1698 r->f[i] = float32_round_to_int (b->f[i], &s); \
1699 } \
1700 }
1701 VRFI(n, float_round_nearest_even)
1702 VRFI(m, float_round_down)
1703 VRFI(p, float_round_up)
1704 VRFI(z, float_round_to_zero)
1705 #undef VRFI
1706
1707 #define VROTATE(suffix, element, mask) \
1708 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1709 { \
1710 int i; \
1711 \
1712 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1713 unsigned int shift = b->element[i] & mask; \
1714 r->element[i] = (a->element[i] << shift) | \
1715 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1716 } \
1717 }
1718 VROTATE(b, u8, 0x7)
1719 VROTATE(h, u16, 0xF)
1720 VROTATE(w, u32, 0x1F)
1721 VROTATE(d, u64, 0x3F)
1722 #undef VROTATE
1723
1724 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1725 {
1726 int i;
1727
1728 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1729 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1730
1731 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1732 }
1733 }
1734
1735 #define VRLMI(name, size, element, insert) \
1736 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1737 { \
1738 int i; \
1739 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1740 uint##size##_t src1 = a->element[i]; \
1741 uint##size##_t src2 = b->element[i]; \
1742 uint##size##_t src3 = r->element[i]; \
1743 uint##size##_t begin, end, shift, mask, rot_val; \
1744 \
1745 shift = extract##size(src2, 0, 6); \
1746 end = extract##size(src2, 8, 6); \
1747 begin = extract##size(src2, 16, 6); \
1748 rot_val = rol##size(src1, shift); \
1749 mask = mask_u##size(begin, end); \
1750 if (insert) { \
1751 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1752 } else { \
1753 r->element[i] = (rot_val & mask); \
1754 } \
1755 } \
1756 }
1757
1758 VRLMI(vrldmi, 64, u64, 1);
1759 VRLMI(vrlwmi, 32, u32, 1);
1760 VRLMI(vrldnm, 64, u64, 0);
1761 VRLMI(vrlwnm, 32, u32, 0);
1762
1763 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1764 ppc_avr_t *c)
1765 {
1766 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1767 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1768 }
1769
1770 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1771 {
1772 int i;
1773
1774 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1775 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1776 }
1777 }
1778
1779 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1780 {
1781 int i;
1782
1783 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1784 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1785 }
1786 }
1787
1788 /* The specification says that the results are undefined if all of the
1789 * shift counts are not identical. We check to make sure that they are
1790 * to conform to what real hardware appears to do. */
1791 #define VSHIFT(suffix, leftp) \
1792 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1793 { \
1794 int shift = b->u8[LO_IDX*15] & 0x7; \
1795 int doit = 1; \
1796 int i; \
1797 \
1798 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1799 doit = doit && ((b->u8[i] & 0x7) == shift); \
1800 } \
1801 if (doit) { \
1802 if (shift == 0) { \
1803 *r = *a; \
1804 } else if (leftp) { \
1805 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1806 \
1807 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1808 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1809 } else { \
1810 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1811 \
1812 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1813 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1814 } \
1815 } \
1816 }
1817 VSHIFT(l, 1)
1818 VSHIFT(r, 0)
1819 #undef VSHIFT
1820
1821 #define VSL(suffix, element, mask) \
1822 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1823 { \
1824 int i; \
1825 \
1826 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1827 unsigned int shift = b->element[i] & mask; \
1828 \
1829 r->element[i] = a->element[i] << shift; \
1830 } \
1831 }
1832 VSL(b, u8, 0x7)
1833 VSL(h, u16, 0x0F)
1834 VSL(w, u32, 0x1F)
1835 VSL(d, u64, 0x3F)
1836 #undef VSL
1837
1838 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1839 {
1840 int i;
1841 unsigned int shift, bytes, size;
1842
1843 size = ARRAY_SIZE(r->u8);
1844 for (i = 0; i < size; i++) {
1845 shift = b->u8[i] & 0x7; /* extract shift value */
1846 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1847 (((i + 1) < size) ? a->u8[i + 1] : 0);
1848 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1849 }
1850 }
1851
1852 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1853 {
1854 int i;
1855 unsigned int shift, bytes;
1856
1857 /* Use reverse order, as destination and source register can be same. Its
1858 * being modified in place saving temporary, reverse order will guarantee
1859 * that computed result is not fed back.
1860 */
1861 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1862 shift = b->u8[i] & 0x7; /* extract shift value */
1863 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1864 /* extract adjacent bytes */
1865 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1866 }
1867 }
1868
1869 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1870 {
1871 int sh = shift & 0xf;
1872 int i;
1873 ppc_avr_t result;
1874
1875 #if defined(HOST_WORDS_BIGENDIAN)
1876 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1877 int index = sh + i;
1878 if (index > 0xf) {
1879 result.u8[i] = b->u8[index - 0x10];
1880 } else {
1881 result.u8[i] = a->u8[index];
1882 }
1883 }
1884 #else
1885 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1886 int index = (16 - sh) + i;
1887 if (index > 0xf) {
1888 result.u8[i] = a->u8[index - 0x10];
1889 } else {
1890 result.u8[i] = b->u8[index];
1891 }
1892 }
1893 #endif
1894 *r = result;
1895 }
1896
1897 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1898 {
1899 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1900
1901 #if defined(HOST_WORDS_BIGENDIAN)
1902 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1903 memset(&r->u8[16-sh], 0, sh);
1904 #else
1905 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1906 memset(&r->u8[0], 0, sh);
1907 #endif
1908 }
1909
1910 /* Experimental testing shows that hardware masks the immediate. */
1911 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1912 #if defined(HOST_WORDS_BIGENDIAN)
1913 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1914 #else
1915 #define SPLAT_ELEMENT(element) \
1916 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1917 #endif
1918 #define VSPLT(suffix, element) \
1919 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1920 { \
1921 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1922 int i; \
1923 \
1924 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1925 r->element[i] = s; \
1926 } \
1927 }
1928 VSPLT(b, u8)
1929 VSPLT(h, u16)
1930 VSPLT(w, u32)
1931 #undef VSPLT
1932 #undef SPLAT_ELEMENT
1933 #undef _SPLAT_MASKED
1934 #if defined(HOST_WORDS_BIGENDIAN)
1935 #define VINSERT(suffix, element) \
1936 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1937 { \
1938 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1939 sizeof(r->element[0])); \
1940 }
1941 #else
1942 #define VINSERT(suffix, element) \
1943 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1944 { \
1945 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1946 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1947 }
1948 #endif
1949 VINSERT(b, u8)
1950 VINSERT(h, u16)
1951 VINSERT(w, u32)
1952 VINSERT(d, u64)
1953 #undef VINSERT
1954 #if defined(HOST_WORDS_BIGENDIAN)
1955 #define VEXTRACT(suffix, element) \
1956 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1957 { \
1958 uint32_t es = sizeof(r->element[0]); \
1959 memmove(&r->u8[8 - es], &b->u8[index], es); \
1960 memset(&r->u8[8], 0, 8); \
1961 memset(&r->u8[0], 0, 8 - es); \
1962 }
1963 #else
1964 #define VEXTRACT(suffix, element) \
1965 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1966 { \
1967 uint32_t es = sizeof(r->element[0]); \
1968 uint32_t s = (16 - index) - es; \
1969 memmove(&r->u8[8], &b->u8[s], es); \
1970 memset(&r->u8[0], 0, 8); \
1971 memset(&r->u8[8 + es], 0, 8 - es); \
1972 }
1973 #endif
1974 VEXTRACT(ub, u8)
1975 VEXTRACT(uh, u16)
1976 VEXTRACT(uw, u32)
1977 VEXTRACT(d, u64)
1978 #undef VEXTRACT
1979
1980 #define VEXT_SIGNED(name, element, mask, cast, recast) \
1981 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1982 { \
1983 int i; \
1984 VECTOR_FOR_INORDER_I(i, element) { \
1985 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
1986 } \
1987 }
1988 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
1989 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
1990 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
1991 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
1992 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
1993 #undef VEXT_SIGNED
1994
1995 #define VNEG(name, element) \
1996 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1997 { \
1998 int i; \
1999 VECTOR_FOR_INORDER_I(i, element) { \
2000 r->element[i] = -b->element[i]; \
2001 } \
2002 }
2003 VNEG(vnegw, s32)
2004 VNEG(vnegd, s64)
2005 #undef VNEG
2006
2007 #define VSPLTI(suffix, element, splat_type) \
2008 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2009 { \
2010 splat_type x = (int8_t)(splat << 3) >> 3; \
2011 int i; \
2012 \
2013 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2014 r->element[i] = x; \
2015 } \
2016 }
2017 VSPLTI(b, s8, int8_t)
2018 VSPLTI(h, s16, int16_t)
2019 VSPLTI(w, s32, int32_t)
2020 #undef VSPLTI
2021
2022 #define VSR(suffix, element, mask) \
2023 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2024 { \
2025 int i; \
2026 \
2027 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2028 unsigned int shift = b->element[i] & mask; \
2029 r->element[i] = a->element[i] >> shift; \
2030 } \
2031 }
2032 VSR(ab, s8, 0x7)
2033 VSR(ah, s16, 0xF)
2034 VSR(aw, s32, 0x1F)
2035 VSR(ad, s64, 0x3F)
2036 VSR(b, u8, 0x7)
2037 VSR(h, u16, 0xF)
2038 VSR(w, u32, 0x1F)
2039 VSR(d, u64, 0x3F)
2040 #undef VSR
2041
2042 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2043 {
2044 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2045
2046 #if defined(HOST_WORDS_BIGENDIAN)
2047 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2048 memset(&r->u8[0], 0, sh);
2049 #else
2050 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2051 memset(&r->u8[16 - sh], 0, sh);
2052 #endif
2053 }
2054
2055 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2056 {
2057 int i;
2058
2059 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2060 r->u32[i] = a->u32[i] >= b->u32[i];
2061 }
2062 }
2063
2064 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2065 {
2066 int64_t t;
2067 int i, upper;
2068 ppc_avr_t result;
2069 int sat = 0;
2070
2071 #if defined(HOST_WORDS_BIGENDIAN)
2072 upper = ARRAY_SIZE(r->s32)-1;
2073 #else
2074 upper = 0;
2075 #endif
2076 t = (int64_t)b->s32[upper];
2077 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2078 t += a->s32[i];
2079 result.s32[i] = 0;
2080 }
2081 result.s32[upper] = cvtsdsw(t, &sat);
2082 *r = result;
2083
2084 if (sat) {
2085 env->vscr |= (1 << VSCR_SAT);
2086 }
2087 }
2088
2089 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2090 {
2091 int i, j, upper;
2092 ppc_avr_t result;
2093 int sat = 0;
2094
2095 #if defined(HOST_WORDS_BIGENDIAN)
2096 upper = 1;
2097 #else
2098 upper = 0;
2099 #endif
2100 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2101 int64_t t = (int64_t)b->s32[upper + i * 2];
2102
2103 result.u64[i] = 0;
2104 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2105 t += a->s32[2 * i + j];
2106 }
2107 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2108 }
2109
2110 *r = result;
2111 if (sat) {
2112 env->vscr |= (1 << VSCR_SAT);
2113 }
2114 }
2115
2116 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2117 {
2118 int i, j;
2119 int sat = 0;
2120
2121 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2122 int64_t t = (int64_t)b->s32[i];
2123
2124 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2125 t += a->s8[4 * i + j];
2126 }
2127 r->s32[i] = cvtsdsw(t, &sat);
2128 }
2129
2130 if (sat) {
2131 env->vscr |= (1 << VSCR_SAT);
2132 }
2133 }
2134
2135 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2136 {
2137 int sat = 0;
2138 int i;
2139
2140 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2141 int64_t t = (int64_t)b->s32[i];
2142
2143 t += a->s16[2 * i] + a->s16[2 * i + 1];
2144 r->s32[i] = cvtsdsw(t, &sat);
2145 }
2146
2147 if (sat) {
2148 env->vscr |= (1 << VSCR_SAT);
2149 }
2150 }
2151
2152 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2153 {
2154 int i, j;
2155 int sat = 0;
2156
2157 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2158 uint64_t t = (uint64_t)b->u32[i];
2159
2160 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2161 t += a->u8[4 * i + j];
2162 }
2163 r->u32[i] = cvtuduw(t, &sat);
2164 }
2165
2166 if (sat) {
2167 env->vscr |= (1 << VSCR_SAT);
2168 }
2169 }
2170
2171 #if defined(HOST_WORDS_BIGENDIAN)
2172 #define UPKHI 1
2173 #define UPKLO 0
2174 #else
2175 #define UPKHI 0
2176 #define UPKLO 1
2177 #endif
2178 #define VUPKPX(suffix, hi) \
2179 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2180 { \
2181 int i; \
2182 ppc_avr_t result; \
2183 \
2184 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2185 uint16_t e = b->u16[hi ? i : i+4]; \
2186 uint8_t a = (e >> 15) ? 0xff : 0; \
2187 uint8_t r = (e >> 10) & 0x1f; \
2188 uint8_t g = (e >> 5) & 0x1f; \
2189 uint8_t b = e & 0x1f; \
2190 \
2191 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2192 } \
2193 *r = result; \
2194 }
2195 VUPKPX(lpx, UPKLO)
2196 VUPKPX(hpx, UPKHI)
2197 #undef VUPKPX
2198
2199 #define VUPK(suffix, unpacked, packee, hi) \
2200 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2201 { \
2202 int i; \
2203 ppc_avr_t result; \
2204 \
2205 if (hi) { \
2206 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2207 result.unpacked[i] = b->packee[i]; \
2208 } \
2209 } else { \
2210 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2211 i++) { \
2212 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2213 } \
2214 } \
2215 *r = result; \
2216 }
2217 VUPK(hsb, s16, s8, UPKHI)
2218 VUPK(hsh, s32, s16, UPKHI)
2219 VUPK(hsw, s64, s32, UPKHI)
2220 VUPK(lsb, s16, s8, UPKLO)
2221 VUPK(lsh, s32, s16, UPKLO)
2222 VUPK(lsw, s64, s32, UPKLO)
2223 #undef VUPK
2224 #undef UPKHI
2225 #undef UPKLO
2226
2227 #define VGENERIC_DO(name, element) \
2228 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2229 { \
2230 int i; \
2231 \
2232 VECTOR_FOR_INORDER_I(i, element) { \
2233 r->element[i] = name(b->element[i]); \
2234 } \
2235 }
2236
2237 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2238 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2239 #define clzw(v) clz32((v))
2240 #define clzd(v) clz64((v))
2241
2242 VGENERIC_DO(clzb, u8)
2243 VGENERIC_DO(clzh, u16)
2244 VGENERIC_DO(clzw, u32)
2245 VGENERIC_DO(clzd, u64)
2246
2247 #undef clzb
2248 #undef clzh
2249 #undef clzw
2250 #undef clzd
2251
2252 #define ctzb(v) ((v) ? ctz32(v) : 8)
2253 #define ctzh(v) ((v) ? ctz32(v) : 16)
2254 #define ctzw(v) ctz32((v))
2255 #define ctzd(v) ctz64((v))
2256
2257 VGENERIC_DO(ctzb, u8)
2258 VGENERIC_DO(ctzh, u16)
2259 VGENERIC_DO(ctzw, u32)
2260 VGENERIC_DO(ctzd, u64)
2261
2262 #undef ctzb
2263 #undef ctzh
2264 #undef ctzw
2265 #undef ctzd
2266
2267 #define popcntb(v) ctpop8(v)
2268 #define popcnth(v) ctpop16(v)
2269 #define popcntw(v) ctpop32(v)
2270 #define popcntd(v) ctpop64(v)
2271
2272 VGENERIC_DO(popcntb, u8)
2273 VGENERIC_DO(popcnth, u16)
2274 VGENERIC_DO(popcntw, u32)
2275 VGENERIC_DO(popcntd, u64)
2276
2277 #undef popcntb
2278 #undef popcnth
2279 #undef popcntw
2280 #undef popcntd
2281
2282 #undef VGENERIC_DO
2283
2284 #if defined(HOST_WORDS_BIGENDIAN)
2285 #define QW_ONE { .u64 = { 0, 1 } }
2286 #else
2287 #define QW_ONE { .u64 = { 1, 0 } }
2288 #endif
2289
2290 #ifndef CONFIG_INT128
2291
2292 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2293 {
2294 t->u64[0] = ~a.u64[0];
2295 t->u64[1] = ~a.u64[1];
2296 }
2297
2298 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2299 {
2300 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2301 return -1;
2302 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2303 return 1;
2304 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2305 return -1;
2306 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2307 return 1;
2308 } else {
2309 return 0;
2310 }
2311 }
2312
2313 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2314 {
2315 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2316 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2317 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2318 }
2319
2320 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2321 {
2322 ppc_avr_t not_a;
2323 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2324 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2325 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2326 avr_qw_not(&not_a, a);
2327 return avr_qw_cmpu(not_a, b) < 0;
2328 }
2329
2330 #endif
2331
2332 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2333 {
2334 #ifdef CONFIG_INT128
2335 r->u128 = a->u128 + b->u128;
2336 #else
2337 avr_qw_add(r, *a, *b);
2338 #endif
2339 }
2340
2341 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2342 {
2343 #ifdef CONFIG_INT128
2344 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2345 #else
2346
2347 if (c->u64[LO_IDX] & 1) {
2348 ppc_avr_t tmp;
2349
2350 tmp.u64[HI_IDX] = 0;
2351 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2352 avr_qw_add(&tmp, *a, tmp);
2353 avr_qw_add(r, tmp, *b);
2354 } else {
2355 avr_qw_add(r, *a, *b);
2356 }
2357 #endif
2358 }
2359
2360 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2361 {
2362 #ifdef CONFIG_INT128
2363 r->u128 = (~a->u128 < b->u128);
2364 #else
2365 ppc_avr_t not_a;
2366
2367 avr_qw_not(&not_a, *a);
2368
2369 r->u64[HI_IDX] = 0;
2370 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2371 #endif
2372 }
2373
2374 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2375 {
2376 #ifdef CONFIG_INT128
2377 int carry_out = (~a->u128 < b->u128);
2378 if (!carry_out && (c->u128 & 1)) {
2379 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2380 ((a->u128 != 0) || (b->u128 != 0));
2381 }
2382 r->u128 = carry_out;
2383 #else
2384
2385 int carry_in = c->u64[LO_IDX] & 1;
2386 int carry_out = 0;
2387 ppc_avr_t tmp;
2388
2389 carry_out = avr_qw_addc(&tmp, *a, *b);
2390
2391 if (!carry_out && carry_in) {
2392 ppc_avr_t one = QW_ONE;
2393 carry_out = avr_qw_addc(&tmp, tmp, one);
2394 }
2395 r->u64[HI_IDX] = 0;
2396 r->u64[LO_IDX] = carry_out;
2397 #endif
2398 }
2399
2400 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2401 {
2402 #ifdef CONFIG_INT128
2403 r->u128 = a->u128 - b->u128;
2404 #else
2405 ppc_avr_t tmp;
2406 ppc_avr_t one = QW_ONE;
2407
2408 avr_qw_not(&tmp, *b);
2409 avr_qw_add(&tmp, *a, tmp);
2410 avr_qw_add(r, tmp, one);
2411 #endif
2412 }
2413
2414 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2415 {
2416 #ifdef CONFIG_INT128
2417 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2418 #else
2419 ppc_avr_t tmp, sum;
2420
2421 avr_qw_not(&tmp, *b);
2422 avr_qw_add(&sum, *a, tmp);
2423
2424 tmp.u64[HI_IDX] = 0;
2425 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2426 avr_qw_add(r, sum, tmp);
2427 #endif
2428 }
2429
2430 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2431 {
2432 #ifdef CONFIG_INT128
2433 r->u128 = (~a->u128 < ~b->u128) ||
2434 (a->u128 + ~b->u128 == (__uint128_t)-1);
2435 #else
2436 int carry = (avr_qw_cmpu(*a, *b) > 0);
2437 if (!carry) {
2438 ppc_avr_t tmp;
2439 avr_qw_not(&tmp, *b);
2440 avr_qw_add(&tmp, *a, tmp);
2441 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2442 }
2443 r->u64[HI_IDX] = 0;
2444 r->u64[LO_IDX] = carry;
2445 #endif
2446 }
2447
2448 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2449 {
2450 #ifdef CONFIG_INT128
2451 r->u128 =
2452 (~a->u128 < ~b->u128) ||
2453 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2454 #else
2455 int carry_in = c->u64[LO_IDX] & 1;
2456 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2457 if (!carry_out && carry_in) {
2458 ppc_avr_t tmp;
2459 avr_qw_not(&tmp, *b);
2460 avr_qw_add(&tmp, *a, tmp);
2461 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2462 }
2463
2464 r->u64[HI_IDX] = 0;
2465 r->u64[LO_IDX] = carry_out;
2466 #endif
2467 }
2468
2469 #define BCD_PLUS_PREF_1 0xC
2470 #define BCD_PLUS_PREF_2 0xF
2471 #define BCD_PLUS_ALT_1 0xA
2472 #define BCD_NEG_PREF 0xD
2473 #define BCD_NEG_ALT 0xB
2474 #define BCD_PLUS_ALT_2 0xE
2475 #define NATIONAL_PLUS 0x2B
2476 #define NATIONAL_NEG 0x2D
2477
2478 #if defined(HOST_WORDS_BIGENDIAN)
2479 #define BCD_DIG_BYTE(n) (15 - (n/2))
2480 #else
2481 #define BCD_DIG_BYTE(n) (n/2)
2482 #endif
2483
2484 static int bcd_get_sgn(ppc_avr_t *bcd)
2485 {
2486 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2487 case BCD_PLUS_PREF_1:
2488 case BCD_PLUS_PREF_2:
2489 case BCD_PLUS_ALT_1:
2490 case BCD_PLUS_ALT_2:
2491 {
2492 return 1;
2493 }
2494
2495 case BCD_NEG_PREF:
2496 case BCD_NEG_ALT:
2497 {
2498 return -1;
2499 }
2500
2501 default:
2502 {
2503 return 0;
2504 }
2505 }
2506 }
2507
2508 static int bcd_preferred_sgn(int sgn, int ps)
2509 {
2510 if (sgn >= 0) {
2511 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2512 } else {
2513 return BCD_NEG_PREF;
2514 }
2515 }
2516
2517 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2518 {
2519 uint8_t result;
2520 if (n & 1) {
2521 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2522 } else {
2523 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2524 }
2525
2526 if (unlikely(result > 9)) {
2527 *invalid = true;
2528 }
2529 return result;
2530 }
2531
2532 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2533 {
2534 if (n & 1) {
2535 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2536 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2537 } else {
2538 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2539 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2540 }
2541 }
2542
2543 static int bcd_cmp_zero(ppc_avr_t *bcd)
2544 {
2545 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2546 return 1 << CRF_EQ;
2547 } else {
2548 return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2549 }
2550 }
2551
2552 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2553 {
2554 #if defined(HOST_WORDS_BIGENDIAN)
2555 return reg->u16[7 - n];
2556 #else
2557 return reg->u16[n];
2558 #endif
2559 }
2560
2561 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2562 {
2563 #if defined(HOST_WORDS_BIGENDIAN)
2564 reg->u16[7 - n] = val;
2565 #else
2566 reg->u16[n] = val;
2567 #endif
2568 }
2569
2570 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2571 {
2572 int i;
2573 int invalid = 0;
2574 for (i = 31; i > 0; i--) {
2575 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2576 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2577 if (unlikely(invalid)) {
2578 return 0; /* doesn't matter */
2579 } else if (dig_a > dig_b) {
2580 return 1;
2581 } else if (dig_a < dig_b) {
2582 return -1;
2583 }
2584 }
2585
2586 return 0;
2587 }
2588
2589 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2590 int *overflow)
2591 {
2592 int carry = 0;
2593 int i;
2594 int is_zero = 1;
2595 for (i = 1; i <= 31; i++) {
2596 uint8_t digit = bcd_get_digit(a, i, invalid) +
2597 bcd_get_digit(b, i, invalid) + carry;
2598 is_zero &= (digit == 0);
2599 if (digit > 9) {
2600 carry = 1;
2601 digit -= 10;
2602 } else {
2603 carry = 0;
2604 }
2605
2606 bcd_put_digit(t, digit, i);
2607
2608 if (unlikely(*invalid)) {
2609 return -1;
2610 }
2611 }
2612
2613 *overflow = carry;
2614 return is_zero;
2615 }
2616
2617 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2618 int *overflow)
2619 {
2620 int carry = 0;
2621 int i;
2622 int is_zero = 1;
2623 for (i = 1; i <= 31; i++) {
2624 uint8_t digit = bcd_get_digit(a, i, invalid) -
2625 bcd_get_digit(b, i, invalid) + carry;
2626 is_zero &= (digit == 0);
2627 if (digit & 0x80) {
2628 carry = -1;
2629 digit += 10;
2630 } else {
2631 carry = 0;
2632 }
2633
2634 bcd_put_digit(t, digit, i);
2635
2636 if (unlikely(*invalid)) {
2637 return -1;
2638 }
2639 }
2640
2641 *overflow = carry;
2642 return is_zero;
2643 }
2644
2645 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2646 {
2647
2648 int sgna = bcd_get_sgn(a);
2649 int sgnb = bcd_get_sgn(b);
2650 int invalid = (sgna == 0) || (sgnb == 0);
2651 int overflow = 0;
2652 int zero = 0;
2653 uint32_t cr = 0;
2654 ppc_avr_t result = { .u64 = { 0, 0 } };
2655
2656 if (!invalid) {
2657 if (sgna == sgnb) {
2658 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2659 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2660 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2661 } else if (bcd_cmp_mag(a, b) > 0) {
2662 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2663 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2664 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2665 } else {
2666 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2667 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2668 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2669 }
2670 }
2671
2672 if (unlikely(invalid)) {
2673 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2674 cr = 1 << CRF_SO;
2675 } else if (overflow) {
2676 cr |= 1 << CRF_SO;
2677 } else if (zero) {
2678 cr = 1 << CRF_EQ;
2679 }
2680
2681 *r = result;
2682
2683 return cr;
2684 }
2685
2686 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2687 {
2688 ppc_avr_t bcopy = *b;
2689 int sgnb = bcd_get_sgn(b);
2690 if (sgnb < 0) {
2691 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2692 } else if (sgnb > 0) {
2693 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2694 }
2695 /* else invalid ... defer to bcdadd code for proper handling */
2696
2697 return helper_bcdadd(r, a, &bcopy, ps);
2698 }
2699
2700 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2701 {
2702 int i;
2703 int cr = 0;
2704 uint16_t national = 0;
2705 uint16_t sgnb = get_national_digit(b, 0);
2706 ppc_avr_t ret = { .u64 = { 0, 0 } };
2707 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2708
2709 for (i = 1; i < 8; i++) {
2710 national = get_national_digit(b, i);
2711 if (unlikely(national < 0x30 || national > 0x39)) {
2712 invalid = 1;
2713 break;
2714 }
2715
2716 bcd_put_digit(&ret, national & 0xf, i);
2717 }
2718
2719 if (sgnb == NATIONAL_PLUS) {
2720 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2721 } else {
2722 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2723 }
2724
2725 cr = bcd_cmp_zero(&ret);
2726
2727 if (unlikely(invalid)) {
2728 cr = 1 << CRF_SO;
2729 }
2730
2731 *r = ret;
2732
2733 return cr;
2734 }
2735
2736 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2737 {
2738 int i;
2739 int cr = 0;
2740 int sgnb = bcd_get_sgn(b);
2741 int invalid = (sgnb == 0);
2742 ppc_avr_t ret = { .u64 = { 0, 0 } };
2743
2744 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2745
2746 for (i = 1; i < 8; i++) {
2747 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2748
2749 if (unlikely(invalid)) {
2750 break;
2751 }
2752 }
2753 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2754
2755 cr = bcd_cmp_zero(b);
2756
2757 if (ox_flag) {
2758 cr |= 1 << CRF_SO;
2759 }
2760
2761 if (unlikely(invalid)) {
2762 cr = 1 << CRF_SO;
2763 }
2764
2765 *r = ret;
2766
2767 return cr;
2768 }
2769
2770 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2771 {
2772 int i;
2773 int cr = 0;
2774 int invalid = 0;
2775 int zone_digit = 0;
2776 int zone_lead = ps ? 0xF : 0x3;
2777 int digit = 0;
2778 ppc_avr_t ret = { .u64 = { 0, 0 } };
2779 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2780
2781 if (unlikely((sgnb < 0xA) && ps)) {
2782 invalid = 1;
2783 }
2784
2785 for (i = 0; i < 16; i++) {
2786 zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2787 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2788 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2789 invalid = 1;
2790 break;
2791 }
2792
2793 bcd_put_digit(&ret, digit, i + 1);
2794 }
2795
2796 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2797 (!ps && (sgnb & 0x4))) {
2798 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2799 } else {
2800 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2801 }
2802
2803 cr = bcd_cmp_zero(&ret);
2804
2805 if (unlikely(invalid)) {
2806 cr = 1 << CRF_SO;
2807 }
2808
2809 *r = ret;
2810
2811 return cr;
2812 }
2813
2814 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2815 {
2816 int i;
2817 int cr = 0;
2818 uint8_t digit = 0;
2819 int sgnb = bcd_get_sgn(b);
2820 int zone_lead = (ps) ? 0xF0 : 0x30;
2821 int invalid = (sgnb == 0);
2822 ppc_avr_t ret = { .u64 = { 0, 0 } };
2823
2824 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2825
2826 for (i = 0; i < 16; i++) {
2827 digit = bcd_get_digit(b, i + 1, &invalid);
2828
2829 if (unlikely(invalid)) {
2830 break;
2831 }
2832
2833 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2834 }
2835
2836 if (ps) {
2837 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2838 } else {
2839 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2840 }
2841
2842 cr = bcd_cmp_zero(b);
2843
2844 if (ox_flag) {
2845 cr |= 1 << CRF_SO;
2846 }
2847
2848 if (unlikely(invalid)) {
2849 cr = 1 << CRF_SO;
2850 }
2851
2852 *r = ret;
2853
2854 return cr;
2855 }
2856
2857 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2858 {
2859 int i;
2860 VECTOR_FOR_INORDER_I(i, u8) {
2861 r->u8[i] = AES_sbox[a->u8[i]];
2862 }
2863 }
2864
2865 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2866 {
2867 ppc_avr_t result;
2868 int i;
2869
2870 VECTOR_FOR_INORDER_I(i, u32) {
2871 result.AVRW(i) = b->AVRW(i) ^
2872 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2873 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2874 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2875 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2876 }
2877 *r = result;
2878 }
2879
2880 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2881 {
2882 ppc_avr_t result;
2883 int i;
2884
2885 VECTOR_FOR_INORDER_I(i, u8) {
2886 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2887 }
2888 *r = result;
2889 }
2890
2891 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2892 {
2893 /* This differs from what is written in ISA V2.07. The RTL is */
2894 /* incorrect and will be fixed in V2.07B. */
2895 int i;
2896 ppc_avr_t tmp;
2897
2898 VECTOR_FOR_INORDER_I(i, u8) {
2899 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2900 }
2901
2902 VECTOR_FOR_INORDER_I(i, u32) {
2903 r->AVRW(i) =
2904 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2905 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2906 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2907 AES_imc[tmp.AVRB(4*i + 3)][3];
2908 }
2909 }
2910
2911 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2912 {
2913 ppc_avr_t result;
2914 int i;
2915
2916 VECTOR_FOR_INORDER_I(i, u8) {
2917 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2918 }
2919 *r = result;
2920 }
2921
2922 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2923 #if defined(HOST_WORDS_BIGENDIAN)
2924 #define EL_IDX(i) (i)
2925 #else
2926 #define EL_IDX(i) (3 - (i))
2927 #endif
2928
2929 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2930 {
2931 int st = (st_six & 0x10) != 0;
2932 int six = st_six & 0xF;
2933 int i;
2934
2935 VECTOR_FOR_INORDER_I(i, u32) {
2936 if (st == 0) {
2937 if ((six & (0x8 >> i)) == 0) {
2938 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2939 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2940 (a->u32[EL_IDX(i)] >> 3);
2941 } else { /* six.bit[i] == 1 */
2942 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2943 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2944 (a->u32[EL_IDX(i)] >> 10);
2945 }
2946 } else { /* st == 1 */
2947 if ((six & (0x8 >> i)) == 0) {
2948 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2949 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2950 ROTRu32(a->u32[EL_IDX(i)], 22);
2951 } else { /* six.bit[i] == 1 */
2952 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2953 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2954 ROTRu32(a->u32[EL_IDX(i)], 25);
2955 }
2956 }
2957 }
2958 }
2959
2960 #undef ROTRu32
2961 #undef EL_IDX
2962
2963 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2964 #if defined(HOST_WORDS_BIGENDIAN)
2965 #define EL_IDX(i) (i)
2966 #else
2967 #define EL_IDX(i) (1 - (i))
2968 #endif
2969
2970 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2971 {
2972 int st = (st_six & 0x10) != 0;
2973 int six = st_six & 0xF;
2974 int i;
2975
2976 VECTOR_FOR_INORDER_I(i, u64) {
2977 if (st == 0) {
2978 if ((six & (0x8 >> (2*i))) == 0) {
2979 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2980 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2981 (a->u64[EL_IDX(i)] >> 7);
2982 } else { /* six.bit[2*i] == 1 */
2983 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2984 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2985 (a->u64[EL_IDX(i)] >> 6);
2986 }
2987 } else { /* st == 1 */
2988 if ((six & (0x8 >> (2*i))) == 0) {
2989 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2990 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2991 ROTRu64(a->u64[EL_IDX(i)], 39);
2992 } else { /* six.bit[2*i] == 1 */
2993 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2994 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2995 ROTRu64(a->u64[EL_IDX(i)], 41);
2996 }
2997 }
2998 }
2999 }
3000
3001 #undef ROTRu64
3002 #undef EL_IDX
3003
3004 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3005 {
3006 ppc_avr_t result;
3007 int i;
3008
3009 VECTOR_FOR_INORDER_I(i, u8) {
3010 int indexA = c->u8[i] >> 4;
3011 int indexB = c->u8[i] & 0xF;
3012 #if defined(HOST_WORDS_BIGENDIAN)
3013 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3014 #else
3015 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3016 #endif
3017 }
3018 *r = result;
3019 }
3020
3021 #undef VECTOR_FOR_INORDER_I
3022 #undef HI_IDX
3023 #undef LO_IDX
3024
3025 /*****************************************************************************/
3026 /* SPE extension helpers */
3027 /* Use a table to make this quicker */
3028 static const uint8_t hbrev[16] = {
3029 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3030 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3031 };
3032
3033 static inline uint8_t byte_reverse(uint8_t val)
3034 {
3035 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3036 }
3037
3038 static inline uint32_t word_reverse(uint32_t val)
3039 {
3040 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3041 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3042 }
3043
3044 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3045 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3046 {
3047 uint32_t a, b, d, mask;
3048
3049 mask = UINT32_MAX >> (32 - MASKBITS);
3050 a = arg1 & mask;
3051 b = arg2 & mask;
3052 d = word_reverse(1 + word_reverse(a | ~b));
3053 return (arg1 & ~mask) | (d & b);
3054 }
3055
3056 uint32_t helper_cntlsw32(uint32_t val)
3057 {
3058 if (val & 0x80000000) {
3059 return clz32(~val);
3060 } else {
3061 return clz32(val);
3062 }
3063 }
3064
3065 uint32_t helper_cntlzw32(uint32_t val)
3066 {
3067 return clz32(val);
3068 }
3069
3070 /* 440 specific */
3071 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3072 target_ulong low, uint32_t update_Rc)
3073 {
3074 target_ulong mask;
3075 int i;
3076
3077 i = 1;
3078 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3079 if ((high & mask) == 0) {
3080 if (update_Rc) {
3081 env->crf[0] = 0x4;
3082 }
3083 goto done;
3084 }
3085 i++;
3086 }
3087 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3088 if ((low & mask) == 0) {
3089 if (update_Rc) {
3090 env->crf[0] = 0x8;
3091 }
3092 goto done;
3093 }
3094 i++;
3095 }
3096 i = 8;
3097 if (update_Rc) {
3098 env->crf[0] = 0x2;
3099 }
3100 done:
3101 env->xer = (env->xer & ~0x7F) | i;
3102 if (update_Rc) {
3103 env->crf[0] |= xer_so;
3104 }
3105 return i;
3106 }