#include "qemu/log.h"
#include "exec/helper-proto.h"
#include "crypto/aes.h"
+#include "crypto/aes-round.h"
#include "fpu/softfloat.h"
#include "qapi/error.h"
#include "qemu/guest-random.h"
+#include "tcg/tcg-gvec-desc.h"
#include "helper_regs.h"
/*****************************************************************************/
static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
{
if (unlikely(ov)) {
- env->so = env->ov = 1;
+ env->so = env->ov = env->ov32 = 1;
} else {
- env->ov = 0;
+ env->ov = env->ov32 = 0;
}
}
/*****************************************************************************/
/* Altivec extension helpers */
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
#define VECTOR_FOR_INORDER_I(index, element) \
for (index = 0; index < ARRAY_SIZE(r->element); index++)
#else
env->vscr_sat.u32[0] = 1;
}
-void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
- r->u32[i] = ~a->u32[i] < b->u32[i];
- }
-}
-
-/* vprtybw */
-void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
-{
- int i;
- for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
- uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
- res ^= res >> 8;
- r->u32[i] = res & 1;
- }
-}
-
-/* vprtybd */
-void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
-{
- int i;
- for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
- uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
- res ^= res >> 16;
- res ^= res >> 8;
- r->u64[i] = res & 1;
- }
-}
-
/* vprtybq */
-void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
+void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
{
uint64_t res = b->u64[0] ^ b->u64[1];
res ^= res >> 32;
#undef VARITHSAT_SIGNED
#undef VARITHSAT_UNSIGNED
-#define VAVG_DO(name, element, etype) \
- void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
- { \
- int i; \
- \
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
- r->element[i] = x >> 1; \
- } \
- }
-
-#define VAVG(type, signed_element, signed_type, unsigned_element, \
- unsigned_type) \
- VAVG_DO(avgs##type, signed_element, signed_type) \
- VAVG_DO(avgu##type, unsigned_element, unsigned_type)
-VAVG(b, s8, int16_t, u8, uint16_t)
-VAVG(h, s16, int32_t, u16, uint32_t)
-VAVG(w, s32, int64_t, u32, uint64_t)
-#undef VAVG_DO
+#define VAVG(name, element, etype) \
+ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
+ { \
+ int i; \
+ \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
+ r->element[i] = x >> 1; \
+ } \
+ }
+
+VAVG(VAVGSB, s8, int16_t)
+VAVG(VAVGUB, u8, uint16_t)
+VAVG(VAVGSH, s16, int32_t)
+VAVG(VAVGUH, u16, uint32_t)
+VAVG(VAVGSW, s32, int64_t)
+VAVG(VAVGUW, u32, uint64_t)
#undef VAVG
-#define VABSDU_DO(name, element) \
-void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+#define VABSDU(name, element) \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
{ \
int i; \
\
* name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
* element - element type to access from vector
*/
-#define VABSDU(type, element) \
- VABSDU_DO(absdu##type, element)
-VABSDU(b, u8)
-VABSDU(h, u16)
-VABSDU(w, u32)
-#undef VABSDU_DO
+VABSDU(VABSDUB, u8)
+VABSDU(VABSDUH, u16)
+VABSDU(VABSDUW, u32)
#undef VABSDU
#define VCF(suffix, cvt, element) \
VCF(sx, int32_to_float32, s32)
#undef VCF
-#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
-void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
- ppc_avr_t *a, ppc_avr_t *b) \
-{ \
- etype ones = (etype)-1; \
- etype all = ones; \
- etype result, none = 0; \
- int i; \
- \
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- if (cmpzero) { \
- result = ((a->element[i] == 0) \
- || (b->element[i] == 0) \
- || (a->element[i] != b->element[i]) ? \
- ones : 0x0); \
- } else { \
- result = (a->element[i] != b->element[i]) ? ones : 0x0; \
- } \
- r->element[i] = result; \
- all &= result; \
- none |= result; \
- } \
- if (record) { \
- env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
- } \
+#define VCMPNEZ(NAME, ELEM) \
+void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
+{ \
+ for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
+ t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
+ (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
+ } \
}
-
-/*
- * VCMPNEZ - Vector compare not equal to zero
- * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
- * element - element type to access from vector
- */
-#define VCMPNE(suffix, element, etype, cmpzero) \
- VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
- VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
-VCMPNE(zb, u8, uint8_t, 1)
-VCMPNE(zh, u16, uint16_t, 1)
-VCMPNE(zw, u32, uint32_t, 1)
-#undef VCMPNE_DO
-#undef VCMPNE
+VCMPNEZ(VCMPNEZB, u8)
+VCMPNEZ(VCMPNEZH, u16)
+VCMPNEZ(VCMPNEZW, u32)
+#undef VCMPNEZ
#define VCMPFP_DO(suffix, compare, order, record) \
void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
VCT(sxs, cvtsdsw, s32)
#undef VCT
+typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
+
+static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
+{
+ int64_t psum = 0;
+ for (int i = 0; i < 8; i++, mask >>= 1) {
+ if (mask & 1) {
+ psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
+ }
+ }
+ return psum;
+}
+
+static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
+{
+ int64_t psum = 0;
+ for (int i = 0; i < 4; i++, mask >>= 1) {
+ if (mask & 1) {
+ psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
+ }
+ }
+ return psum;
+}
+
+static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
+{
+ int64_t psum = 0;
+ for (int i = 0; i < 2; i++, mask >>= 1) {
+ if (mask & 1) {
+ psum += (int64_t)sextract32(a, 16 * i, 16) *
+ sextract32(b, 16 * i, 16);
+ }
+ }
+ return psum;
+}
+
+static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
+ uint32_t mask, bool sat, bool acc, do_ger ger)
+{
+ uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
+ xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
+ ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
+ uint8_t xmsk_bit, ymsk_bit;
+ int64_t psum;
+ int i, j;
+ for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
+ for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
+ if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
+ psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
+ if (acc) {
+ psum += at[i].VsrSW(j);
+ }
+ if (sat && psum > INT32_MAX) {
+ set_vscr_sat(env);
+ at[i].VsrSW(j) = INT32_MAX;
+ } else if (sat && psum < INT32_MIN) {
+ set_vscr_sat(env);
+ at[i].VsrSW(j) = INT32_MIN;
+ } else {
+ at[i].VsrSW(j) = (int32_t) psum;
+ }
+ } else {
+ at[i].VsrSW(j) = 0;
+ }
+ }
+ }
+}
+
+QEMU_FLATTEN
+void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, false, ger_rank8);
+}
+
+QEMU_FLATTEN
+void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, true, ger_rank8);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, false, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, true, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, true, true, ger_rank4);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, false, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, true, false, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, false, true, ger_rank2);
+}
+
+QEMU_FLATTEN
+void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
+ ppc_acc_t *at, uint32_t mask)
+{
+ xviger(env, a, b, at, mask, true, true, ger_rank2);
+}
+
target_ulong helper_vclzlsbb(ppc_avr_t *r)
{
target_ulong count = 0;
return count;
}
-void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
{
int sat = 0;
}
}
-void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
{
int sat = 0;
}
}
-void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
+ uint32_t v)
{
int i;
#undef VMRG_DO
#undef VMRG
-void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
- ppc_avr_t *b, ppc_avr_t *c)
+void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
int32_t prod[16];
int i;
}
}
-void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
- ppc_avr_t *b, ppc_avr_t *c)
+void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
int32_t prod[8];
int i;
}
}
-void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
{
int32_t prod[8];
}
}
-void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
- ppc_avr_t *b, ppc_avr_t *c)
+void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
uint16_t prod[16];
int i;
}
}
-void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
- ppc_avr_t *b, ppc_avr_t *c)
+void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
uint32_t prod[8];
int i;
}
}
-void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
+void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
{
uint32_t prod[8];
#undef VMUL_DO_ODD
#undef VMUL
-void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
- ppc_avr_t *c)
+void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
+ target_ulong uim)
+{
+ int i, idx;
+ ppc_vsr_t tmp = { .u64 = {0, 0} };
+
+ for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
+ if ((pcv->VsrB(i) >> 5) == uim) {
+ idx = pcv->VsrB(i) & 0x1f;
+ if (idx < ARRAY_SIZE(t->u8)) {
+ tmp.VsrB(i) = s0->VsrB(idx);
+ } else {
+ tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
+ }
+ }
+ }
+
+ *t = tmp;
+}
+
+void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 neg1 = int128_makes64(-1);
+ Int128 int128_min = int128_make128(0, INT64_MIN);
+ if (likely(int128_nz(b->s128) &&
+ (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
+ t->s128 = int128_divs(a->s128, b->s128);
+ } else {
+ t->s128 = a->s128; /* Undefined behavior */
+ }
+}
+
+void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ if (int128_nz(b->s128)) {
+ t->s128 = int128_divu(a->s128, b->s128);
+ } else {
+ t->s128 = a->s128; /* Undefined behavior */
+ }
+}
+
+void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ int i;
+ int64_t high;
+ uint64_t low;
+ for (i = 0; i < 2; i++) {
+ high = a->s64[i];
+ low = 0;
+ if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
+ t->s64[i] = a->s64[i]; /* Undefined behavior */
+ } else {
+ divs128(&low, &high, b->s64[i]);
+ t->s64[i] = low;
+ }
+ }
+}
+
+void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ int i;
+ uint64_t high, low;
+ for (i = 0; i < 2; i++) {
+ high = a->u64[i];
+ low = 0;
+ if (unlikely(!b->u64[i])) {
+ t->u64[i] = a->u64[i]; /* Undefined behavior */
+ } else {
+ divu128(&low, &high, b->u64[i]);
+ t->u64[i] = low;
+ }
+ }
+}
+
+void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 high, low;
+ Int128 int128_min = int128_make128(0, INT64_MIN);
+ Int128 neg1 = int128_makes64(-1);
+
+ high = a->s128;
+ low = int128_zero();
+ if (unlikely(!int128_nz(b->s128) ||
+ (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
+ t->s128 = a->s128; /* Undefined behavior */
+ } else {
+ divs256(&low, &high, b->s128);
+ t->s128 = low;
+ }
+}
+
+void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 high, low;
+
+ high = a->s128;
+ low = int128_zero();
+ if (unlikely(!int128_nz(b->s128))) {
+ t->s128 = a->s128; /* Undefined behavior */
+ } else {
+ divu256(&low, &high, b->s128);
+ t->s128 = low;
+ }
+}
+
+void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ Int128 neg1 = int128_makes64(-1);
+ Int128 int128_min = int128_make128(0, INT64_MIN);
+ if (likely(int128_nz(b->s128) &&
+ (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
+ t->s128 = int128_rems(a->s128, b->s128);
+ } else {
+ t->s128 = int128_zero(); /* Undefined behavior */
+ }
+}
+
+void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
+{
+ if (likely(int128_nz(b->s128))) {
+ t->s128 = int128_remu(a->s128, b->s128);
+ } else {
+ t->s128 = int128_zero(); /* Undefined behavior */
+ }
+}
+
+void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
ppc_avr_t result;
int i;
*r = result;
}
-void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
- ppc_avr_t *c)
+void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
ppc_avr_t result;
int i;
*r = result;
}
-#if defined(HOST_WORDS_BIGENDIAN)
+#define XXGENPCV_BE_EXP(NAME, SZ) \
+void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
+{ \
+ ppc_vsr_t tmp; \
+ \
+ /* Initialize tmp with the result of an all-zeros mask */ \
+ tmp.VsrD(0) = 0x1011121314151617; \
+ tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
+ \
+ /* Iterate over the most significant byte of each element */ \
+ for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
+ if (b->VsrB(i) & 0x80) { \
+ /* Update each byte of the element */ \
+ for (int k = 0; k < SZ; k++) { \
+ tmp.VsrB(i + k) = j + k; \
+ } \
+ j += SZ; \
+ } \
+ } \
+ \
+ *t = tmp; \
+}
+
+#define XXGENPCV_BE_COMP(NAME, SZ) \
+void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
+{ \
+ ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
+ \
+ /* Iterate over the most significant byte of each element */ \
+ for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
+ if (b->VsrB(i) & 0x80) { \
+ /* Update each byte of the element */ \
+ for (int k = 0; k < SZ; k++) { \
+ tmp.VsrB(j + k) = i + k; \
+ } \
+ j += SZ; \
+ } \
+ } \
+ \
+ *t = tmp; \
+}
+
+#define XXGENPCV_LE_EXP(NAME, SZ) \
+void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
+{ \
+ ppc_vsr_t tmp; \
+ \
+ /* Initialize tmp with the result of an all-zeros mask */ \
+ tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
+ tmp.VsrD(1) = 0x1716151413121110; \
+ \
+ /* Iterate over the most significant byte of each element */ \
+ for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
+ /* Reverse indexing of "i" */ \
+ const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
+ if (b->VsrB(idx) & 0x80) { \
+ /* Update each byte of the element */ \
+ for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
+ tmp.VsrB(idx + rk) = j + k; \
+ } \
+ j += SZ; \
+ } \
+ } \
+ \
+ *t = tmp; \
+}
+
+#define XXGENPCV_LE_COMP(NAME, SZ) \
+void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
+{ \
+ ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
+ \
+ /* Iterate over the most significant byte of each element */ \
+ for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
+ if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
+ /* Update each byte of the element */ \
+ for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
+ /* Reverse indexing of "j" */ \
+ const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
+ tmp.VsrB(idx + rk) = i + k; \
+ } \
+ j += SZ; \
+ } \
+ } \
+ \
+ *t = tmp; \
+}
+
+#define XXGENPCV(NAME, SZ) \
+ XXGENPCV_BE_EXP(NAME, SZ) \
+ XXGENPCV_BE_COMP(NAME, SZ) \
+ XXGENPCV_LE_EXP(NAME, SZ) \
+ XXGENPCV_LE_COMP(NAME, SZ) \
+
+XXGENPCV(XXGENPCVBM, 1)
+XXGENPCV(XXGENPCVHM, 2)
+XXGENPCV(XXGENPCVWM, 4)
+XXGENPCV(XXGENPCVDM, 8)
+
+#undef XXGENPCV_BE_EXP
+#undef XXGENPCV_BE_COMP
+#undef XXGENPCV_LE_EXP
+#undef XXGENPCV_LE_COMP
+#undef XXGENPCV
+
+#if HOST_BIG_ENDIAN
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
#define VBPERMD_INDEX(i) (i)
#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
-#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
#else
#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
#define VBPERMD_INDEX(i) (1 - i)
#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
-#define EXTRACT_BIT(avr, i, index) \
- (extract64((avr)->u64[1 - i], 63 - index, 1))
#endif
+#define EXTRACT_BIT(avr, i, index) \
+ (extract64((avr)->VsrD(i), 63 - index, 1))
void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
PMSUM(vpmsumh, u16, u32, uint32_t)
PMSUM(vpmsumw, u32, u64, uint64_t)
-void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
-
-#ifdef CONFIG_INT128
int i, j;
- __uint128_t prod[2];
-
- VECTOR_FOR_INORDER_I(i, u64) {
- prod[i] = 0;
- for (j = 0; j < 64; j++) {
- if (a->u64[i] & (1ull << j)) {
- prod[i] ^= (((__uint128_t)b->u64[i]) << j);
- }
- }
- }
-
- r->u128 = prod[0] ^ prod[1];
-
-#else
- int i, j;
- ppc_avr_t prod[2];
-
- VECTOR_FOR_INORDER_I(i, u64) {
- prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
- for (j = 0; j < 64; j++) {
- if (a->u64[i] & (1ull << j)) {
- ppc_avr_t bshift;
- if (j == 0) {
- bshift.VsrD(0) = 0;
- bshift.VsrD(1) = b->u64[i];
- } else {
- bshift.VsrD(0) = b->u64[i] >> (64 - j);
- bshift.VsrD(1) = b->u64[i] << j;
- }
- prod[i].VsrD(1) ^= bshift.VsrD(1);
- prod[i].VsrD(0) ^= bshift.VsrD(0);
+ Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
+
+ for (j = 0; j < 64; j++) {
+ for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
+ if (a->VsrD(i) & (1ull << j)) {
+ tmp = int128_make64(b->VsrD(i));
+ tmp = int128_lshift(tmp, j);
+ prod[i] = int128_xor(prod[i], tmp);
}
}
}
- r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
- r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
-#endif
+ r->s128 = int128_xor(prod[0], prod[1]);
}
-
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
#define PKBIG 1
#else
#define PKBIG 0
{
int i, j;
ppc_avr_t result;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
const ppc_avr_t *x[2] = { a, b };
#else
const ppc_avr_t *x[2] = { b, a };
}
}
-#define VRLMI(name, size, element, insert) \
-void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
-{ \
- int i; \
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- uint##size##_t src1 = a->element[i]; \
- uint##size##_t src2 = b->element[i]; \
- uint##size##_t src3 = r->element[i]; \
- uint##size##_t begin, end, shift, mask, rot_val; \
- \
- shift = extract##size(src2, 0, 6); \
- end = extract##size(src2, 8, 6); \
- begin = extract##size(src2, 16, 6); \
- rot_val = rol##size(src1, shift); \
- mask = mask_u##size(begin, end); \
- if (insert) { \
- r->element[i] = (rot_val & mask) | (src3 & ~mask); \
- } else { \
- r->element[i] = (rot_val & mask); \
- } \
- } \
+#define VRLMI(name, size, element, insert) \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
+{ \
+ int i; \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ uint##size##_t src1 = a->element[i]; \
+ uint##size##_t src2 = b->element[i]; \
+ uint##size##_t src3 = r->element[i]; \
+ uint##size##_t begin, end, shift, mask, rot_val; \
+ \
+ shift = extract##size(src2, 0, 6); \
+ end = extract##size(src2, 8, 6); \
+ begin = extract##size(src2, 16, 6); \
+ rot_val = rol##size(src1, shift); \
+ mask = mask_u##size(begin, end); \
+ if (insert) { \
+ r->element[i] = (rot_val & mask) | (src3 & ~mask); \
+ } else { \
+ r->element[i] = (rot_val & mask); \
+ } \
+ } \
}
-VRLMI(vrldmi, 64, u64, 1);
-VRLMI(vrlwmi, 32, u32, 1);
-VRLMI(vrldnm, 64, u64, 0);
-VRLMI(vrlwnm, 32, u32, 0);
-
-void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
- ppc_avr_t *c)
-{
- r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
- r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
-}
+VRLMI(VRLDMI, 64, u64, 1);
+VRLMI(VRLWMI, 32, u32, 1);
+VRLMI(VRLDNM, 64, u64, 0);
+VRLMI(VRLWNM, 32, u32, 0);
void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
{
{
int sh = (b->VsrB(0xf) >> 3) & 0xf;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
memmove(&r->u8[0], &a->u8[sh], 16 - sh);
memset(&r->u8[16 - sh], 0, sh);
#else
#endif
}
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
#else
#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
VINSX(D, uint64_t)
#undef ELEM_ADDR
#undef VINSX
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
#define VEXTDVLX(NAME, SIZE) \
void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
target_ulong index) \
VEXTDVLX(VEXTDUWVLX, 4)
VEXTDVLX(VEXTDDVLX, 8)
#undef VEXTDVLX
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
#define VEXTRACT(suffix, element) \
void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
{ \
VEXTRACT(d, u64)
#undef VEXTRACT
-void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
- ppc_vsr_t *xb, uint32_t index)
+#define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
+uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
+{ \
+ int i, idx, crf = 0; \
+ \
+ for (i = 0; i < NUM_ELEMS; i++) { \
+ idx = LEFT ? i : NUM_ELEMS - i - 1; \
+ if (b->Vsr##ELEM(idx)) { \
+ t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
+ } else { \
+ crf = 0b0010; \
+ break; \
+ } \
+ } \
+ \
+ for (; i < NUM_ELEMS; i++) { \
+ idx = LEFT ? i : NUM_ELEMS - i - 1; \
+ t->Vsr##ELEM(idx) = 0; \
+ } \
+ \
+ return crf; \
+}
+VSTRI(VSTRIBL, B, 16, true)
+VSTRI(VSTRIBR, B, 16, false)
+VSTRI(VSTRIHL, H, 8, true)
+VSTRI(VSTRIHR, H, 8, false)
+#undef VSTRI
+
+void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
{
ppc_vsr_t t = { };
size_t es = sizeof(uint32_t);
*xt = t;
}
-void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
- ppc_vsr_t *xb, uint32_t index)
+void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
{
ppc_vsr_t t = *xt;
size_t es = sizeof(uint32_t);
*xt = t;
}
+void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
+ uint32_t desc)
+{
+ /*
+ * Instead of processing imm bit-by-bit, we'll skip the computation of
+ * conjunctions whose corresponding bit is unset.
+ */
+ int bit, imm = simd_data(desc);
+ Int128 conj, disj = int128_zero();
+
+ /* Iterate over set bits from the least to the most significant bit */
+ while (imm) {
+ /*
+ * Get the next bit to be processed with ctz64. Invert the result of
+ * ctz64 to match the indexing used by PowerISA.
+ */
+ bit = 7 - ctzl(imm);
+ if (bit & 0x4) {
+ conj = a->s128;
+ } else {
+ conj = int128_not(a->s128);
+ }
+ if (bit & 0x2) {
+ conj = int128_and(conj, b->s128);
+ } else {
+ conj = int128_and(conj, int128_not(b->s128));
+ }
+ if (bit & 0x1) {
+ conj = int128_and(conj, c->s128);
+ } else {
+ conj = int128_and(conj, int128_not(c->s128));
+ }
+ disj = int128_or(disj, conj);
+
+ /* Unset the least significant bit that is set */
+ imm &= imm - 1;
+ }
+
+ t->s128 = disj;
+}
+
#define XXBLEND(name, sz) \
void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
ppc_avr_t *c, uint32_t desc) \
XXBLEND(D, 64)
#undef XXBLEND
-#define VNEG(name, element) \
-void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
-{ \
- int i; \
- for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
- r->element[i] = -b->element[i]; \
- } \
-}
-VNEG(vnegw, s32)
-VNEG(vnegd, s64)
-#undef VNEG
-
void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
int sh = (b->VsrB(0xf) >> 3) & 0xf;
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
memmove(&r->u8[sh], &a->u8[0], 16 - sh);
memset(&r->u8[0], 0, sh);
#else
#endif
}
-void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
- r->u32[i] = a->u32[i] >= b->u32[i];
- }
-}
-
void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
int64_t t;
}
}
-#if defined(HOST_WORDS_BIGENDIAN)
+#if HOST_BIG_ENDIAN
#define UPKHI 1
#define UPKLO 0
#else
#undef VGENERIC_DO
-#if defined(HOST_WORDS_BIGENDIAN)
-#define QW_ONE { .u64 = { 0, 1 } }
-#else
-#define QW_ONE { .u64 = { 1, 0 } }
-#endif
-
-#ifndef CONFIG_INT128
-
-static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
-{
- t->u64[0] = ~a.u64[0];
- t->u64[1] = ~a.u64[1];
-}
-
-static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
-{
- if (a.VsrD(0) < b.VsrD(0)) {
- return -1;
- } else if (a.VsrD(0) > b.VsrD(0)) {
- return 1;
- } else if (a.VsrD(1) < b.VsrD(1)) {
- return -1;
- } else if (a.VsrD(1) > b.VsrD(1)) {
- return 1;
- } else {
- return 0;
- }
-}
-
-static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
-{
- t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
- t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
- (~a.VsrD(1) < b.VsrD(1));
-}
-
-static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
-{
- ppc_avr_t not_a;
- t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
- t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
- (~a.VsrD(1) < b.VsrD(1));
- avr_qw_not(¬_a, a);
- return avr_qw_cmpu(not_a, b) < 0;
-}
-
-#endif
-
-void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
-#ifdef CONFIG_INT128
- r->u128 = a->u128 + b->u128;
-#else
- avr_qw_add(r, *a, *b);
-#endif
+ r->s128 = int128_add(a->s128, b->s128);
}
-void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
-#ifdef CONFIG_INT128
- r->u128 = a->u128 + b->u128 + (c->u128 & 1);
-#else
-
- if (c->VsrD(1) & 1) {
- ppc_avr_t tmp;
-
- tmp.VsrD(0) = 0;
- tmp.VsrD(1) = c->VsrD(1) & 1;
- avr_qw_add(&tmp, *a, tmp);
- avr_qw_add(r, tmp, *b);
- } else {
- avr_qw_add(r, *a, *b);
- }
-#endif
+ r->s128 = int128_add(int128_add(a->s128, b->s128),
+ int128_make64(int128_getlo(c->s128) & 1));
}
-void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
-#ifdef CONFIG_INT128
- r->u128 = (~a->u128 < b->u128);
-#else
- ppc_avr_t not_a;
-
- avr_qw_not(¬_a, *a);
-
+ r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
r->VsrD(0) = 0;
- r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
-#endif
}
-void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
-#ifdef CONFIG_INT128
- int carry_out = (~a->u128 < b->u128);
- if (!carry_out && (c->u128 & 1)) {
- carry_out = ((a->u128 + b->u128 + 1) == 0) &&
- ((a->u128 != 0) || (b->u128 != 0));
- }
- r->u128 = carry_out;
-#else
-
- int carry_in = c->VsrD(1) & 1;
- int carry_out = 0;
- ppc_avr_t tmp;
-
- carry_out = avr_qw_addc(&tmp, *a, *b);
+ bool carry_out = int128_ult(int128_not(a->s128), b->s128),
+ carry_in = int128_getlo(c->s128) & 1;
if (!carry_out && carry_in) {
- ppc_avr_t one = QW_ONE;
- carry_out = avr_qw_addc(&tmp, tmp, one);
+ carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
+ int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
}
+
r->VsrD(0) = 0;
r->VsrD(1) = carry_out;
-#endif
}
-void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
-#ifdef CONFIG_INT128
- r->u128 = a->u128 - b->u128;
-#else
- ppc_avr_t tmp;
- ppc_avr_t one = QW_ONE;
-
- avr_qw_not(&tmp, *b);
- avr_qw_add(&tmp, *a, tmp);
- avr_qw_add(r, tmp, one);
-#endif
+ r->s128 = int128_sub(a->s128, b->s128);
}
-void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
-#ifdef CONFIG_INT128
- r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
-#else
- ppc_avr_t tmp, sum;
-
- avr_qw_not(&tmp, *b);
- avr_qw_add(&sum, *a, tmp);
-
- tmp.VsrD(0) = 0;
- tmp.VsrD(1) = c->VsrD(1) & 1;
- avr_qw_add(r, sum, tmp);
-#endif
+ r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
+ int128_make64(int128_getlo(c->s128) & 1));
}
-void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
-#ifdef CONFIG_INT128
- r->u128 = (~a->u128 < ~b->u128) ||
- (a->u128 + ~b->u128 == (__uint128_t)-1);
-#else
- int carry = (avr_qw_cmpu(*a, *b) > 0);
- if (!carry) {
- ppc_avr_t tmp;
- avr_qw_not(&tmp, *b);
- avr_qw_add(&tmp, *a, tmp);
- carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
- }
+ Int128 tmp = int128_not(b->s128);
+
+ r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
+ int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
r->VsrD(0) = 0;
- r->VsrD(1) = carry;
-#endif
}
-void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
-#ifdef CONFIG_INT128
- r->u128 =
- (~a->u128 < ~b->u128) ||
- ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
-#else
- int carry_in = c->VsrD(1) & 1;
- int carry_out = (avr_qw_cmpu(*a, *b) > 0);
- if (!carry_out && carry_in) {
- ppc_avr_t tmp;
- avr_qw_not(&tmp, *b);
- avr_qw_add(&tmp, *a, tmp);
- carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
- }
+ Int128 tmp = int128_not(b->s128);
+ bool carry_out = int128_ult(int128_not(a->s128), tmp),
+ carry_in = int128_getlo(c->s128) & 1;
+ r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
+ int128_makes64(-1)));
r->VsrD(0) = 0;
- r->VsrD(1) = carry_out;
-#endif
}
#define BCD_PLUS_PREF_1 0xC
void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
- ppc_avr_t result;
- int i;
+ AESState *ad = (AESState *)r;
+ AESState *st = (AESState *)a;
+ AESState *rk = (AESState *)b;
- VECTOR_FOR_INORDER_I(i, u32) {
- result.VsrW(i) = b->VsrW(i) ^
- (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
- AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
- AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
- AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
- }
- *r = result;
+ aesenc_SB_SR_MC_AK(ad, st, rk, true);
}
void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
- ppc_avr_t result;
- int i;
-
- VECTOR_FOR_INORDER_I(i, u8) {
- result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
- }
- *r = result;
+ aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
}
void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
- ppc_avr_t result;
- int i;
-
- VECTOR_FOR_INORDER_I(i, u8) {
- result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
- }
- *r = result;
+ aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
}
void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)