[mirror_qemu.git] / target-sparc / vis_helper.c

/*
 * VIS op helpers
 *
 *  Copyright (c) 2003-2005 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */

#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"

/* This function uses non-native bit order */
#define GET_FIELD(X, FROM, TO)                                  \
    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))

/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
#define GET_FIELD_SP(X, FROM, TO)               \
    GET_FIELD(X, 63 - (TO), 63 - (FROM))

target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
{
    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
        (((pixel_addr >> 55) & 1) << 4) |
        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
        GET_FIELD_SP(pixel_addr, 11, 12);
}

#ifdef HOST_WORDS_BIGENDIAN
#define VIS_B64(n) b[7 - (n)]
#define VIS_W64(n) w[3 - (n)]
#define VIS_SW64(n) sw[3 - (n)]
#define VIS_L64(n) l[1 - (n)]
#define VIS_B32(n) b[3 - (n)]
#define VIS_W32(n) w[1 - (n)]
#else
#define VIS_B64(n) b[n]
#define VIS_W64(n) w[n]
#define VIS_SW64(n) sw[n]
#define VIS_L64(n) l[n]
#define VIS_B32(n) b[n]
#define VIS_W32(n) w[n]
#endif

typedef union {
    uint8_t b[8];
    uint16_t w[4];
    int16_t sw[4];
    uint32_t l[2];
    uint64_t ll;
    float64 d;
} VIS64;

typedef union {
    uint8_t b[4];
    uint16_t w[2];
    uint32_t l;
    float32 f;
} VIS32;

uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;

    s.ll = src1;
    d.ll = src2;

    /* Reverse calculation order to handle overlap */
    d.VIS_B64(7) = s.VIS_B64(3);
    d.VIS_B64(6) = d.VIS_B64(3);
    d.VIS_B64(5) = s.VIS_B64(2);
    d.VIS_B64(4) = d.VIS_B64(2);
    d.VIS_B64(3) = s.VIS_B64(1);
    d.VIS_B64(2) = d.VIS_B64(1);
    d.VIS_B64(1) = s.VIS_B64(0);
    /* d.VIS_B64(0) = d.VIS_B64(0); */

    return d.ll;
}

uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;

    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
#undef PMUL

    return d.ll;
}

uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;

    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
#undef PMUL

    return d.ll;
}

uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;

    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
#undef PMUL

    return d.ll;
}

uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_W64(r) = tmp >> 8;

    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
#undef PMUL

    return d.ll;
}

uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_W64(r) = tmp >> 8;

    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
#undef PMUL

    return d.ll;
}

uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_L64(r) = tmp;

    /* Reverse calculation order to handle overlap */
    PMUL(1);
    PMUL(0);
#undef PMUL

    return d.ll;
}

uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
{
    VIS64 s, d;
    uint32_t tmp;

    s.ll = src1;
    d.ll = src2;

#define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_L64(r) = tmp;

    /* Reverse calculation order to handle overlap */
    PMUL(1);
    PMUL(0);
#undef PMUL

    return d.ll;
}

uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
{
    VIS32 s;
    VIS64 d;

    s.l = (uint32_t)src1;
    d.ll = src2;
    d.VIS_W64(0) = s.VIS_B32(0) << 4;
    d.VIS_W64(1) = s.VIS_B32(1) << 4;
    d.VIS_W64(2) = s.VIS_B32(2) << 4;
    d.VIS_W64(3) = s.VIS_B32(3) << 4;

    return d.ll;
}

#define VIS_HELPER(name, F)                             \
    uint64_t name##16(uint64_t src1, uint64_t src2)     \
    {                                                   \
        VIS64 s, d;                                     \
                                                        \
        s.ll = src1;                                    \
        d.ll = src2;                                    \
                                                        \
        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
                                                        \
        return d.ll;                                    \
    }                                                   \
                                                        \
    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
    {                                                   \
        VIS32 s, d;                                     \
                                                        \
        s.l = src1;                                     \
        d.l = src2;                                     \
                                                        \
        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
                                                        \
        return d.l;                                     \
    }                                                   \
                                                        \
    uint64_t name##32(uint64_t src1, uint64_t src2)     \
    {                                                   \
        VIS64 s, d;                                     \
                                                        \
        s.ll = src1;                                    \
        d.ll = src2;                                    \
                                                        \
        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
                                                        \
        return d.ll;                                    \
    }                                                   \
                                                        \
    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
    {                                                   \
        VIS32 s, d;                                     \
                                                        \
        s.l = src1;                                     \
        d.l = src2;                                     \
                                                        \
        d.l = F(d.l, s.l);                              \
                                                        \
        return d.l;                                     \
    }

#define FADD(a, b) ((a) + (b))
#define FSUB(a, b) ((a) - (b))
VIS_HELPER(helper_fpadd, FADD)
VIS_HELPER(helper_fpsub, FSUB)

#define VIS_CMPHELPER(name, F)                                    \
    uint64_t name##16(uint64_t src1, uint64_t src2)               \
    {                                                             \
        VIS64 s, d;                                               \
                                                                  \
        s.ll = src1;                                              \
        d.ll = src2;                                              \
                                                                  \
        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
                                                                  \
        return d.ll;                                              \
    }                                                             \
                                                                  \
    uint64_t name##32(uint64_t src1, uint64_t src2)               \
    {                                                             \
        VIS64 s, d;                                               \
                                                                  \
        s.ll = src1;                                              \
        d.ll = src2;                                              \
                                                                  \
        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
        d.VIS_L64(1) = 0;                                         \
                                                                  \
        return d.ll;                                              \
    }

#define FCMPGT(a, b) ((a) > (b))
#define FCMPEQ(a, b) ((a) == (b))
#define FCMPLE(a, b) ((a) <= (b))
#define FCMPNE(a, b) ((a) != (b))

VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
VIS_CMPHELPER(helper_fcmple, FCMPLE)
VIS_CMPHELPER(helper_fcmpne, FCMPNE)

uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
{
    int i;
    for (i = 0; i < 8; i++) {
        int s1, s2;

        s1 = (src1 >> (56 - (i * 8))) & 0xff;
        s2 = (src2 >> (56 - (i * 8))) & 0xff;

        /* Absolute value of difference. */
        s1 -= s2;
        if (s1 < 0) {
            s1 = -s1;
        }

        sum += s1;
    }

    return sum;
}

uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
{
    int scale = (gsr >> 3) & 0xf;
    uint32_t ret = 0;
    int byte;

    for (byte = 0; byte < 4; byte++) {
        uint32_t val;
        int16_t src = rs2 >> (byte * 16);
        int32_t scaled = src << scale;
        int32_t from_fixed = scaled >> 7;

        val = (from_fixed < 0 ?  0 :
               from_fixed > 255 ?  255 : from_fixed);

        ret |= val << (8 * byte);
    }

    return ret;
}

uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
{
    int scale = (gsr >> 3) & 0x1f;
    uint64_t ret = 0;
    int word;

    ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
    for (word = 0; word < 2; word++) {
        uint64_t val;
        int32_t src = rs2 >> (word * 32);
        int64_t scaled = (int64_t)src << scale;
        int64_t from_fixed = scaled >> 23;

        val = (from_fixed < 0 ? 0 :
               (from_fixed > 255) ? 255 : from_fixed);

        ret |= val << (32 * word);
    }

    return ret;
}

uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
{
    int scale = (gsr >> 3) & 0x1f;
    uint32_t ret = 0;
    int word;

    for (word = 0; word < 2; word++) {
        uint32_t val;
        int32_t src = rs2 >> (word * 32);
        int64_t scaled = (int64_t)src << scale;
        int64_t from_fixed = scaled >> 16;

        val = (from_fixed < -32768 ? -32768 :
               from_fixed > 32767 ?  32767 : from_fixed);

        ret |= (val & 0xffff) << (word * 16);
    }

    return ret;
}

uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
{
    union {
        uint64_t ll[2];
        uint8_t b[16];
    } s;
    VIS64 r;
    uint32_t i, mask, host;

    /* Set up S such that we can index across all of the bytes.  */
#ifdef HOST_WORDS_BIGENDIAN
    s.ll[0] = src1;
    s.ll[1] = src2;
    host = 0;
#else
    s.ll[1] = src1;
    s.ll[0] = src2;
    host = 15;
#endif
    mask = gsr >> 32;

    for (i = 0; i < 8; ++i) {
        unsigned e = (mask >> (28 - i*4)) & 0xf;
        r.VIS_B64(i) = s.b[e ^ host];
    }

    return r.ll;
}
Commit	Line	Data
1bccec25 BS	1	/*
	2	* VIS op helpers
	3	*
	4	* Copyright (c) 2003-2005 Fabrice Bellard
	5	*
	6	* This library is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Lesser General Public
	8	* License as published by the Free Software Foundation; either
	9	* version 2 of the License, or (at your option) any later version.
	10	*
	11	* This library is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	* Lesser General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Lesser General Public
	17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
	18	*/
	19
db5ebe5f	20	#include "qemu/osdep.h"
1bccec25	21	#include "cpu.h"
2ef6175a	22	#include "exec/helper-proto.h"
1bccec25	23
1bccec25 BS	24	/* This function uses non-native bit order */
	25	#define GET_FIELD(X, FROM, TO) \
	26	((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
	27
	28	/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
	29	#define GET_FIELD_SP(X, FROM, TO) \
	30	GET_FIELD(X, 63 - (TO), 63 - (FROM))
	31
f027c3b1	32	target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
1bccec25 BS	33	{
	34	return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) \|
	35	(GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) \|
	36	(GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) \|
	37	(GET_FIELD_SP(pixel_addr, 56, 59) << 13) \|
	38	(GET_FIELD_SP(pixel_addr, 35, 38) << 9) \|
	39	(GET_FIELD_SP(pixel_addr, 13, 16) << 5) \|
	40	(((pixel_addr >> 55) & 1) << 4) \|
	41	(GET_FIELD_SP(pixel_addr, 33, 34) << 2) \|
	42	GET_FIELD_SP(pixel_addr, 11, 12);
	43	}
	44
1bccec25 BS	45	#ifdef HOST_WORDS_BIGENDIAN
	46	#define VIS_B64(n) b[7 - (n)]
	47	#define VIS_W64(n) w[3 - (n)]
	48	#define VIS_SW64(n) sw[3 - (n)]
	49	#define VIS_L64(n) l[1 - (n)]
	50	#define VIS_B32(n) b[3 - (n)]
	51	#define VIS_W32(n) w[1 - (n)]
	52	#else
	53	#define VIS_B64(n) b[n]
	54	#define VIS_W64(n) w[n]
	55	#define VIS_SW64(n) sw[n]
	56	#define VIS_L64(n) l[n]
	57	#define VIS_B32(n) b[n]
	58	#define VIS_W32(n) w[n]
	59	#endif
	60
	61	typedef union {
	62	uint8_t b[8];
	63	uint16_t w[4];
	64	int16_t sw[4];
	65	uint32_t l[2];
	66	uint64_t ll;
	67	float64 d;
	68	} VIS64;
	69
	70	typedef union {
	71	uint8_t b[4];
	72	uint16_t w[2];
	73	uint32_t l;
	74	float32 f;
	75	} VIS32;
	76
f027c3b1	77	uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
1bccec25 BS	78	{
	79	VIS64 s, d;
	80
03fb8cfc RH	81	s.ll = src1;
03fb8cfc RH	82	d.ll = src2;
1bccec25 BS	83
	84	/* Reverse calculation order to handle overlap */
	85	d.VIS_B64(7) = s.VIS_B64(3);
	86	d.VIS_B64(6) = d.VIS_B64(3);
	87	d.VIS_B64(5) = s.VIS_B64(2);
	88	d.VIS_B64(4) = d.VIS_B64(2);
	89	d.VIS_B64(3) = s.VIS_B64(1);
	90	d.VIS_B64(2) = d.VIS_B64(1);
	91	d.VIS_B64(1) = s.VIS_B64(0);
	92	/* d.VIS_B64(0) = d.VIS_B64(0); */
	93
03fb8cfc	94	return d.ll;
1bccec25 BS	95	}
1bccec25 BS	96
f027c3b1	97	uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
1bccec25 BS	98	{
	99	VIS64 s, d;
	100	uint32_t tmp;
	101
03fb8cfc RH	102	s.ll = src1;
03fb8cfc RH	103	d.ll = src2;
1bccec25 BS	104
	105	#define PMUL(r) \
	106	tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \
	107	if ((tmp & 0xff) > 0x7f) { \
	108	tmp += 0x100; \
	109	} \
	110	d.VIS_W64(r) = tmp >> 8;
	111
	112	PMUL(0);
	113	PMUL(1);
	114	PMUL(2);
	115	PMUL(3);
	116	#undef PMUL
	117
03fb8cfc	118	return d.ll;
1bccec25 BS	119	}
1bccec25 BS	120
f027c3b1	121	uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
1bccec25 BS	122	{
	123	VIS64 s, d;
	124	uint32_t tmp;
	125
03fb8cfc RH	126	s.ll = src1;
03fb8cfc RH	127	d.ll = src2;
1bccec25 BS	128
	129	#define PMUL(r) \
	130	tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \
	131	if ((tmp & 0xff) > 0x7f) { \
	132	tmp += 0x100; \
	133	} \
	134	d.VIS_W64(r) = tmp >> 8;
	135
	136	PMUL(0);
	137	PMUL(1);
	138	PMUL(2);
	139	PMUL(3);
	140	#undef PMUL
	141
03fb8cfc	142	return d.ll;
1bccec25 BS	143	}
1bccec25 BS	144
f027c3b1	145	uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
1bccec25 BS	146	{
	147	VIS64 s, d;
	148	uint32_t tmp;
	149
03fb8cfc RH	150	s.ll = src1;
03fb8cfc RH	151	d.ll = src2;
1bccec25 BS	152
	153	#define PMUL(r) \
	154	tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \
	155	if ((tmp & 0xff) > 0x7f) { \
	156	tmp += 0x100; \
	157	} \
	158	d.VIS_W64(r) = tmp >> 8;
	159
	160	PMUL(0);
	161	PMUL(1);
	162	PMUL(2);
	163	PMUL(3);
	164	#undef PMUL
	165
03fb8cfc	166	return d.ll;
1bccec25 BS	167	}
1bccec25 BS	168
f027c3b1	169	uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
1bccec25 BS	170	{
	171	VIS64 s, d;
	172	uint32_t tmp;
	173
03fb8cfc RH	174	s.ll = src1;
03fb8cfc RH	175	d.ll = src2;
1bccec25 BS	176
	177	#define PMUL(r) \
	178	tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
	179	if ((tmp & 0xff) > 0x7f) { \
	180	tmp += 0x100; \
	181	} \
	182	d.VIS_W64(r) = tmp >> 8;
	183
	184	PMUL(0);
	185	PMUL(1);
	186	PMUL(2);
	187	PMUL(3);
	188	#undef PMUL
	189
03fb8cfc	190	return d.ll;
1bccec25 BS	191	}
1bccec25 BS	192
f027c3b1	193	uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
1bccec25 BS	194	{
	195	VIS64 s, d;
	196	uint32_t tmp;
	197
03fb8cfc RH	198	s.ll = src1;
03fb8cfc RH	199	d.ll = src2;
1bccec25 BS	200
	201	#define PMUL(r) \
	202	tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
	203	if ((tmp & 0xff) > 0x7f) { \
	204	tmp += 0x100; \
	205	} \
	206	d.VIS_W64(r) = tmp >> 8;
	207
	208	PMUL(0);
	209	PMUL(1);
	210	PMUL(2);
	211	PMUL(3);
	212	#undef PMUL
	213
03fb8cfc	214	return d.ll;
1bccec25 BS	215	}
1bccec25 BS	216
f027c3b1	217	uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
1bccec25 BS	218	{
	219	VIS64 s, d;
	220	uint32_t tmp;
	221
03fb8cfc RH	222	s.ll = src1;
03fb8cfc RH	223	d.ll = src2;
1bccec25 BS	224
	225	#define PMUL(r) \
	226	tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
	227	if ((tmp & 0xff) > 0x7f) { \
	228	tmp += 0x100; \
	229	} \
	230	d.VIS_L64(r) = tmp;
	231
	232	/* Reverse calculation order to handle overlap */
	233	PMUL(1);
	234	PMUL(0);
	235	#undef PMUL
	236
03fb8cfc	237	return d.ll;
1bccec25 BS	238	}
1bccec25 BS	239
f027c3b1	240	uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
1bccec25 BS	241	{
	242	VIS64 s, d;
	243	uint32_t tmp;
	244
03fb8cfc RH	245	s.ll = src1;
03fb8cfc RH	246	d.ll = src2;
1bccec25 BS	247
	248	#define PMUL(r) \
	249	tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
	250	if ((tmp & 0xff) > 0x7f) { \
	251	tmp += 0x100; \
	252	} \
	253	d.VIS_L64(r) = tmp;
	254
	255	/* Reverse calculation order to handle overlap */
	256	PMUL(1);
	257	PMUL(0);
	258	#undef PMUL
	259
03fb8cfc	260	return d.ll;
1bccec25 BS	261	}
1bccec25 BS	262
f027c3b1	263	uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
1bccec25 BS	264	{
	265	VIS32 s;
	266	VIS64 d;
	267
03fb8cfc RH	268	s.l = (uint32_t)src1;
03fb8cfc RH	269	d.ll = src2;
1bccec25 BS	270	d.VIS_W64(0) = s.VIS_B32(0) << 4;
	271	d.VIS_W64(1) = s.VIS_B32(1) << 4;
	272	d.VIS_W64(2) = s.VIS_B32(2) << 4;
	273	d.VIS_W64(3) = s.VIS_B32(3) << 4;
	274
03fb8cfc	275	return d.ll;
1bccec25 BS	276	}
	277
	278	#define VIS_HELPER(name, F) \
f027c3b1	279	uint64_t name##16(uint64_t src1, uint64_t src2) \
1bccec25 BS	280	{ \
	281	VIS64 s, d; \
	282	\
03fb8cfc RH	283	s.ll = src1; \
03fb8cfc RH	284	d.ll = src2; \
1bccec25 BS	285	\
	286	d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \
	287	d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \
	288	d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \
	289	d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \
	290	\
03fb8cfc	291	return d.ll; \
1bccec25 BS	292	} \
1bccec25 BS	293	\
f027c3b1	294	uint32_t name##16s(uint32_t src1, uint32_t src2) \
1bccec25 BS	295	{ \
	296	VIS32 s, d; \
	297	\
	298	s.l = src1; \
	299	d.l = src2; \
	300	\
	301	d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \
	302	d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \
	303	\
	304	return d.l; \
	305	} \
	306	\
f027c3b1	307	uint64_t name##32(uint64_t src1, uint64_t src2) \
1bccec25 BS	308	{ \
	309	VIS64 s, d; \
	310	\
03fb8cfc RH	311	s.ll = src1; \
03fb8cfc RH	312	d.ll = src2; \
1bccec25 BS	313	\
	314	d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \
	315	d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \
	316	\
03fb8cfc	317	return d.ll; \
1bccec25 BS	318	} \
1bccec25 BS	319	\
f027c3b1	320	uint32_t name##32s(uint32_t src1, uint32_t src2) \
1bccec25 BS	321	{ \
	322	VIS32 s, d; \
	323	\
	324	s.l = src1; \
	325	d.l = src2; \
	326	\
	327	d.l = F(d.l, s.l); \
	328	\
	329	return d.l; \
	330	}
	331
	332	#define FADD(a, b) ((a) + (b))
	333	#define FSUB(a, b) ((a) - (b))
	334	VIS_HELPER(helper_fpadd, FADD)
	335	VIS_HELPER(helper_fpsub, FSUB)
	336
	337	#define VIS_CMPHELPER(name, F) \
f027c3b1	338	uint64_t name##16(uint64_t src1, uint64_t src2) \
1bccec25 BS	339	{ \
	340	VIS64 s, d; \
	341	\
03fb8cfc RH	342	s.ll = src1; \
03fb8cfc RH	343	d.ll = src2; \
1bccec25 BS	344	\
	345	d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \
	346	d.VIS_W64(0) \|= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \
	347	d.VIS_W64(0) \|= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \
	348	d.VIS_W64(0) \|= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \
	349	d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \
	350	\
	351	return d.ll; \
	352	} \
	353	\
f027c3b1	354	uint64_t name##32(uint64_t src1, uint64_t src2) \
1bccec25 BS	355	{ \
	356	VIS64 s, d; \
	357	\
03fb8cfc RH	358	s.ll = src1; \
03fb8cfc RH	359	d.ll = src2; \
1bccec25 BS	360	\
	361	d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \
	362	d.VIS_L64(0) \|= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \
	363	d.VIS_L64(1) = 0; \
	364	\
	365	return d.ll; \
	366	}
	367
	368	#define FCMPGT(a, b) ((a) > (b))
	369	#define FCMPEQ(a, b) ((a) == (b))
	370	#define FCMPLE(a, b) ((a) <= (b))
	371	#define FCMPNE(a, b) ((a) != (b))
	372
	373	VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
	374	VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
	375	VIS_CMPHELPER(helper_fcmple, FCMPLE)
	376	VIS_CMPHELPER(helper_fcmpne, FCMPNE)
f888300b RH	377
	378	uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
	379	{
	380	int i;
	381	for (i = 0; i < 8; i++) {
	382	int s1, s2;
	383
	384	s1 = (src1 >> (56 - (i * 8))) & 0xff;
	385	s2 = (src2 >> (56 - (i * 8))) & 0xff;
	386
	387	/* Absolute value of difference. */
	388	s1 -= s2;
	389	if (s1 < 0) {
	390	s1 = -s1;
	391	}
	392
	393	sum += s1;
	394	}
	395
	396	return sum;
	397	}
2dedf314 RH	398
	399	uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
	400	{
	401	int scale = (gsr >> 3) & 0xf;
	402	uint32_t ret = 0;
	403	int byte;
	404
	405	for (byte = 0; byte < 4; byte++) {
	406	uint32_t val;
	407	int16_t src = rs2 >> (byte * 16);
	408	int32_t scaled = src << scale;
	409	int32_t from_fixed = scaled >> 7;
	410
	411	val = (from_fixed < 0 ? 0 :
	412	from_fixed > 255 ? 255 : from_fixed);
	413
	414	ret \|= val << (8 * byte);
	415	}
	416
	417	return ret;
	418	}
	419
	420	uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
	421	{
	422	int scale = (gsr >> 3) & 0x1f;
	423	uint64_t ret = 0;
	424	int word;
	425
	426	ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
	427	for (word = 0; word < 2; word++) {
	428	uint64_t val;
	429	int32_t src = rs2 >> (word * 32);
	430	int64_t scaled = (int64_t)src << scale;
	431	int64_t from_fixed = scaled >> 23;
	432
	433	val = (from_fixed < 0 ? 0 :
	434	(from_fixed > 255) ? 255 : from_fixed);
	435
	436	ret \|= val << (32 * word);
	437	}
	438
	439	return ret;
	440	}
	441
	442	uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
	443	{
	444	int scale = (gsr >> 3) & 0x1f;
	445	uint32_t ret = 0;
	446	int word;
	447
	448	for (word = 0; word < 2; word++) {
	449	uint32_t val;
	450	int32_t src = rs2 >> (word * 32);
12a3567c	451	int64_t scaled = (int64_t)src << scale;
2dedf314 RH	452	int64_t from_fixed = scaled >> 16;
	453
	454	val = (from_fixed < -32768 ? -32768 :
	455	from_fixed > 32767 ? 32767 : from_fixed);
	456
	457	ret \|= (val & 0xffff) << (word * 16);
	458	}
	459
	460	return ret;
	461	}
793a137a	462
520c0d8d	463	uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
793a137a RH	464	{
	465	union {
	466	uint64_t ll[2];
	467	uint8_t b[16];
	468	} s;
	469	VIS64 r;
	470	uint32_t i, mask, host;
	471
	472	/* Set up S such that we can index across all of the bytes. */
	473	#ifdef HOST_WORDS_BIGENDIAN
	474	s.ll[0] = src1;
	475	s.ll[1] = src2;
	476	host = 0;
	477	#else
	478	s.ll[1] = src1;
	479	s.ll[0] = src2;
	480	host = 15;
	481	#endif
	482	mask = gsr >> 32;
	483
	484	for (i = 0; i < 8; ++i) {
	485	unsigned e = (mask >> (28 - i*4)) & 0xf;
	486	r.VIS_B64(i) = s.b[e ^ host];
	487	}
	488
	489	return r.ll;
	490	}