* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "qemu/osdep.h"
#include "cpu.h"
+#include "exec/exec-all.h"
#include "qemu/host-utils.h"
-#include "helper.h"
+#include "exec/helper-proto.h"
+#include "crypto/aes.h"
#include "helper_regs.h"
/*****************************************************************************/
/* Fixed point operations helpers */
-#if defined(TARGET_PPC64)
-
-uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
-{
- int64_t th;
- uint64_t tl;
-
- muls64(&tl, (uint64_t *)&th, arg1, arg2);
- /* If th != 0 && th != -1, then we had an overflow */
- if (likely((uint64_t)(th + 1) <= 1)) {
- env->ov = 0;
- } else {
- env->so = env->ov = 1;
- }
- return (int64_t)tl;
-}
-#endif
target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
uint32_t oe)
return clz32(t);
}
+target_ulong helper_cnttzw(target_ulong t)
+{
+ return ctz32(t);
+}
+
#if defined(TARGET_PPC64)
+/* if x = 0xab, returns 0xababababababababa */
+#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
+
+/* substract 1 from each byte, and with inverse, check if MSB is set at each
+ * byte.
+ * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
+ * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
+ */
+#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
+
+/* When you XOR the pattern and there is a match, that byte will be zero */
+#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
+
+uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+{
+ return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
+}
+
+#undef pattern
+#undef haszero
+#undef hasvalue
+
target_ulong helper_cntlzd(target_ulong t)
{
return clz64(t);
}
+
+target_ulong helper_cnttzd(target_ulong t)
+{
+ return ctz64(t);
+}
#endif
#if defined(TARGET_PPC64)
if (likely((uint64_t)shift != 0)) {
shift &= 0x3f;
ret = (int64_t)value >> shift;
- if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
+ if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
env->ca = 0;
} else {
env->ca = 1;
#if defined(HOST_WORDS_BIGENDIAN)
#define HI_IDX 0
#define LO_IDX 1
+#define AVRB(i) u8[i]
+#define AVRW(i) u32[i]
#else
#define HI_IDX 1
#define LO_IDX 0
+#define AVRB(i) u8[15-(i)]
+#define AVRW(i) u32[3-(i)]
#endif
#if defined(HOST_WORDS_BIGENDIAN)
#undef VAVG_DO
#undef VAVG
+#define VABSDU_DO(name, element) \
+void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+{ \
+ int i; \
+ \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ r->element[i] = (a->element[i] > b->element[i]) ? \
+ (a->element[i] - b->element[i]) : \
+ (b->element[i] - a->element[i]); \
+ } \
+}
+
+/* VABSDU - Vector absolute difference unsigned
+ * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
+ * element - element type to access from vector
+ */
+#define VABSDU(type, element) \
+ VABSDU_DO(absdu##type, element)
+VABSDU(b, u8)
+VABSDU(h, u16)
+VABSDU(w, u32)
+#undef VABSDU_DO
+#undef VABSDU
+
#define VCF(suffix, cvt, element) \
void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
ppc_avr_t *b, uint32_t uim) \
int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
if (le_rel == float_relation_unordered) {
r->u32[i] = 0xc0000000;
- /* ALL_IN does not need to be updated here. */
+ all_in = 1;
} else {
float32 bneg = float32_chs(b->f[i]);
int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
#undef VBPERMQ_INDEX
#undef VBPERMQ_DW
-uint64_t VGBBD_MASKS[256] = {
+static const uint64_t VGBBD_MASKS[256] = {
0x0000000000000000ull, /* 00 */
0x0000000000000080ull, /* 01 */
0x0000000000008000ull, /* 02 */
r->u64[1] = t[1];
}
+#define PMSUM(name, srcfld, trgfld, trgtyp) \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+{ \
+ int i, j; \
+ trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
+ \
+ VECTOR_FOR_INORDER_I(i, srcfld) { \
+ prod[i] = 0; \
+ for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
+ if (a->srcfld[i] & (1ull<<j)) { \
+ prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
+ } \
+ } \
+ } \
+ \
+ VECTOR_FOR_INORDER_I(i, trgfld) { \
+ r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
+ } \
+}
+
+PMSUM(vpmsumb, u8, u16, uint16_t)
+PMSUM(vpmsumh, u16, u32, uint32_t)
+PMSUM(vpmsumw, u32, u64, uint64_t)
+
+void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+
+#ifdef CONFIG_INT128
+ int i, j;
+ __uint128_t prod[2];
+
+ VECTOR_FOR_INORDER_I(i, u64) {
+ prod[i] = 0;
+ for (j = 0; j < 64; j++) {
+ if (a->u64[i] & (1ull<<j)) {
+ prod[i] ^= (((__uint128_t)b->u64[i]) << j);
+ }
+ }
+ }
+
+ r->u128 = prod[0] ^ prod[1];
+
+#else
+ int i, j;
+ ppc_avr_t prod[2];
+
+ VECTOR_FOR_INORDER_I(i, u64) {
+ prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
+ for (j = 0; j < 64; j++) {
+ if (a->u64[i] & (1ull<<j)) {
+ ppc_avr_t bshift;
+ if (j == 0) {
+ bshift.u64[HI_IDX] = 0;
+ bshift.u64[LO_IDX] = b->u64[i];
+ } else {
+ bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
+ bshift.u64[LO_IDX] = b->u64[i] << j;
+ }
+ prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
+ prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
+ }
+ }
+ }
+
+ r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
+ r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
+#endif
+}
+
+
#if defined(HOST_WORDS_BIGENDIAN)
#define PKBIG 1
#else
}
}
-#if defined(HOST_WORDS_BIGENDIAN)
-#define LEFT 0
-#define RIGHT 1
-#else
-#define LEFT 1
-#define RIGHT 0
-#endif
/* The specification says that the results are undefined if all of the
* shift counts are not identical. We check to make sure that they are
* to conform to what real hardware appears to do. */
} \
} \
}
-VSHIFT(l, LEFT)
-VSHIFT(r, RIGHT)
+VSHIFT(l, 1)
+VSHIFT(r, 0)
#undef VSHIFT
-#undef LEFT
-#undef RIGHT
#define VSL(suffix, element, mask) \
void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
#endif
}
+#define BCD_PLUS_PREF_1 0xC
+#define BCD_PLUS_PREF_2 0xF
+#define BCD_PLUS_ALT_1 0xA
+#define BCD_NEG_PREF 0xD
+#define BCD_NEG_ALT 0xB
+#define BCD_PLUS_ALT_2 0xE
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define BCD_DIG_BYTE(n) (15 - (n/2))
+#else
+#define BCD_DIG_BYTE(n) (n/2)
+#endif
+
+static int bcd_get_sgn(ppc_avr_t *bcd)
+{
+ switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
+ case BCD_PLUS_PREF_1:
+ case BCD_PLUS_PREF_2:
+ case BCD_PLUS_ALT_1:
+ case BCD_PLUS_ALT_2:
+ {
+ return 1;
+ }
+
+ case BCD_NEG_PREF:
+ case BCD_NEG_ALT:
+ {
+ return -1;
+ }
+
+ default:
+ {
+ return 0;
+ }
+ }
+}
+
+static int bcd_preferred_sgn(int sgn, int ps)
+{
+ if (sgn >= 0) {
+ return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
+ } else {
+ return BCD_NEG_PREF;
+ }
+}
+
+static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
+{
+ uint8_t result;
+ if (n & 1) {
+ result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
+ } else {
+ result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
+ }
+
+ if (unlikely(result > 9)) {
+ *invalid = true;
+ }
+ return result;
+}
+
+static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
+{
+ if (n & 1) {
+ bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
+ bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
+ } else {
+ bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
+ bcd->u8[BCD_DIG_BYTE(n)] |= digit;
+ }
+}
+
+static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
+{
+ int i;
+ int invalid = 0;
+ for (i = 31; i > 0; i--) {
+ uint8_t dig_a = bcd_get_digit(a, i, &invalid);
+ uint8_t dig_b = bcd_get_digit(b, i, &invalid);
+ if (unlikely(invalid)) {
+ return 0; /* doesn't matter */
+ } else if (dig_a > dig_b) {
+ return 1;
+ } else if (dig_a < dig_b) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
+ int *overflow)
+{
+ int carry = 0;
+ int i;
+ int is_zero = 1;
+ for (i = 1; i <= 31; i++) {
+ uint8_t digit = bcd_get_digit(a, i, invalid) +
+ bcd_get_digit(b, i, invalid) + carry;
+ is_zero &= (digit == 0);
+ if (digit > 9) {
+ carry = 1;
+ digit -= 10;
+ } else {
+ carry = 0;
+ }
+
+ bcd_put_digit(t, digit, i);
+
+ if (unlikely(*invalid)) {
+ return -1;
+ }
+ }
+
+ *overflow = carry;
+ return is_zero;
+}
+
+static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
+ int *overflow)
+{
+ int carry = 0;
+ int i;
+ int is_zero = 1;
+ for (i = 1; i <= 31; i++) {
+ uint8_t digit = bcd_get_digit(a, i, invalid) -
+ bcd_get_digit(b, i, invalid) + carry;
+ is_zero &= (digit == 0);
+ if (digit & 0x80) {
+ carry = -1;
+ digit += 10;
+ } else {
+ carry = 0;
+ }
+
+ bcd_put_digit(t, digit, i);
+
+ if (unlikely(*invalid)) {
+ return -1;
+ }
+ }
+
+ *overflow = carry;
+ return is_zero;
+}
+
+uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
+{
+
+ int sgna = bcd_get_sgn(a);
+ int sgnb = bcd_get_sgn(b);
+ int invalid = (sgna == 0) || (sgnb == 0);
+ int overflow = 0;
+ int zero = 0;
+ uint32_t cr = 0;
+ ppc_avr_t result = { .u64 = { 0, 0 } };
+
+ if (!invalid) {
+ if (sgna == sgnb) {
+ result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
+ zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
+ cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+ } else if (bcd_cmp_mag(a, b) > 0) {
+ result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
+ zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
+ cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+ } else {
+ result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
+ zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
+ cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+ }
+ }
+
+ if (unlikely(invalid)) {
+ result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
+ cr = 1 << CRF_SO;
+ } else if (overflow) {
+ cr |= 1 << CRF_SO;
+ } else if (zero) {
+ cr = 1 << CRF_EQ;
+ }
+
+ *r = result;
+
+ return cr;
+}
+
+uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
+{
+ ppc_avr_t bcopy = *b;
+ int sgnb = bcd_get_sgn(b);
+ if (sgnb < 0) {
+ bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
+ } else if (sgnb > 0) {
+ bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
+ }
+ /* else invalid ... defer to bcdadd code for proper handling */
+
+ return helper_bcdadd(r, a, &bcopy, ps);
+}
+
+void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
+{
+ int i;
+ VECTOR_FOR_INORDER_I(i, u8) {
+ r->u8[i] = AES_sbox[a->u8[i]];
+ }
+}
+
+void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ ppc_avr_t result;
+ int i;
+
+ VECTOR_FOR_INORDER_I(i, u32) {
+ result.AVRW(i) = b->AVRW(i) ^
+ (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
+ AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
+ AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
+ AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
+ }
+ *r = result;
+}
+
+void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ ppc_avr_t result;
+ int i;
+
+ VECTOR_FOR_INORDER_I(i, u8) {
+ result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
+ }
+ *r = result;
+}
+
+void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ /* This differs from what is written in ISA V2.07. The RTL is */
+ /* incorrect and will be fixed in V2.07B. */
+ int i;
+ ppc_avr_t tmp;
+
+ VECTOR_FOR_INORDER_I(i, u8) {
+ tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
+ }
+
+ VECTOR_FOR_INORDER_I(i, u32) {
+ r->AVRW(i) =
+ AES_imc[tmp.AVRB(4*i + 0)][0] ^
+ AES_imc[tmp.AVRB(4*i + 1)][1] ^
+ AES_imc[tmp.AVRB(4*i + 2)][2] ^
+ AES_imc[tmp.AVRB(4*i + 3)][3];
+ }
+}
+
+void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ ppc_avr_t result;
+ int i;
+
+ VECTOR_FOR_INORDER_I(i, u8) {
+ result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
+ }
+ *r = result;
+}
+
+#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
+#if defined(HOST_WORDS_BIGENDIAN)
+#define EL_IDX(i) (i)
+#else
+#define EL_IDX(i) (3 - (i))
+#endif
+
+void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
+{
+ int st = (st_six & 0x10) != 0;
+ int six = st_six & 0xF;
+ int i;
+
+ VECTOR_FOR_INORDER_I(i, u32) {
+ if (st == 0) {
+ if ((six & (0x8 >> i)) == 0) {
+ r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
+ ROTRu32(a->u32[EL_IDX(i)], 18) ^
+ (a->u32[EL_IDX(i)] >> 3);
+ } else { /* six.bit[i] == 1 */
+ r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
+ ROTRu32(a->u32[EL_IDX(i)], 19) ^
+ (a->u32[EL_IDX(i)] >> 10);
+ }
+ } else { /* st == 1 */
+ if ((six & (0x8 >> i)) == 0) {
+ r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
+ ROTRu32(a->u32[EL_IDX(i)], 13) ^
+ ROTRu32(a->u32[EL_IDX(i)], 22);
+ } else { /* six.bit[i] == 1 */
+ r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
+ ROTRu32(a->u32[EL_IDX(i)], 11) ^
+ ROTRu32(a->u32[EL_IDX(i)], 25);
+ }
+ }
+ }
+}
+
+#undef ROTRu32
+#undef EL_IDX
+
+#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
+#if defined(HOST_WORDS_BIGENDIAN)
+#define EL_IDX(i) (i)
+#else
+#define EL_IDX(i) (1 - (i))
+#endif
+
+void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
+{
+ int st = (st_six & 0x10) != 0;
+ int six = st_six & 0xF;
+ int i;
+
+ VECTOR_FOR_INORDER_I(i, u64) {
+ if (st == 0) {
+ if ((six & (0x8 >> (2*i))) == 0) {
+ r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
+ ROTRu64(a->u64[EL_IDX(i)], 8) ^
+ (a->u64[EL_IDX(i)] >> 7);
+ } else { /* six.bit[2*i] == 1 */
+ r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
+ ROTRu64(a->u64[EL_IDX(i)], 61) ^
+ (a->u64[EL_IDX(i)] >> 6);
+ }
+ } else { /* st == 1 */
+ if ((six & (0x8 >> (2*i))) == 0) {
+ r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
+ ROTRu64(a->u64[EL_IDX(i)], 34) ^
+ ROTRu64(a->u64[EL_IDX(i)], 39);
+ } else { /* six.bit[2*i] == 1 */
+ r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
+ ROTRu64(a->u64[EL_IDX(i)], 18) ^
+ ROTRu64(a->u64[EL_IDX(i)], 41);
+ }
+ }
+ }
+}
+
+#undef ROTRu64
+#undef EL_IDX
+
+void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+{
+ ppc_avr_t result;
+ int i;
+
+ VECTOR_FOR_INORDER_I(i, u8) {
+ int indexA = c->u8[i] >> 4;
+ int indexB = c->u8[i] & 0xF;
+#if defined(HOST_WORDS_BIGENDIAN)
+ result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
+#else
+ result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
+#endif
+ }
+ *r = result;
+}
#undef VECTOR_FOR_INORDER_I
#undef HI_IDX
}
i++;
}
+ i = 8;
if (update_Rc) {
env->crf[0] = 0x2;
}