* _ap absolute set r0 = memw(r1=##variable)
* _pr post increment register r0 = memw(r1++m1)
* _pi post increment immediate r0 = memb(r1++#1)
+ * _pci post increment circular immediate r0 = memw(r1++#4:circ(m0))
+ * _pcr post increment circular register r0 = memw(r1++I:circ(m0))
*/
/* Macros for complex addressing modes */
fEA_REG(RxV); \
fPM_I(RxV, siV); \
} while (0)
-
+#define GET_EA_pci \
+ do { \
+ TCGv tcgv_siV = tcg_const_tl(siV); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_helper_fcircadd(RxV, RxV, tcgv_siV, MuV, \
+ hex_gpr[HEX_REG_CS0 + MuN]); \
+ tcg_temp_free(tcgv_siV); \
+ } while (0)
+#define GET_EA_pcr(SHIFT) \
+ do { \
+ TCGv ireg = tcg_temp_new(); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_read_ireg(ireg, MuV, (SHIFT)); \
+ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+ tcg_temp_free(ireg); \
+ } while (0)
/* Instructions with multiple definitions */
#define fGEN_TCG_LOAD_AP(RES, SIZE, SIGN) \
#define fGEN_TCG_L4_loadrd_ap(SHORTCODE) \
fGEN_TCG_LOAD_AP(RddV, 8, u)
+#define fGEN_TCG_L2_loadrub_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrb_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadruh_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrh_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadri_pci(SHORTCODE) SHORTCODE
+#define fGEN_TCG_L2_loadrd_pci(SHORTCODE) SHORTCODE
+
+#define fGEN_TCG_LOAD_pcr(SHIFT, LOAD) \
+ do { \
+ TCGv ireg = tcg_temp_new(); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_read_ireg(ireg, MuV, SHIFT); \
+ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+ LOAD; \
+ tcg_temp_free(ireg); \
+ } while (0)
+
+#define fGEN_TCG_L2_loadrub_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, u, EA, RdV))
+#define fGEN_TCG_L2_loadrb_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, s, EA, RdV))
+#define fGEN_TCG_L2_loadruh_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, u, EA, RdV))
+#define fGEN_TCG_L2_loadrh_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, s, EA, RdV))
+#define fGEN_TCG_L2_loadri_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(2, fLOAD(1, 4, u, EA, RdV))
+#define fGEN_TCG_L2_loadrd_pcr(SHORTCODE) \
+ fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV))
+
#define fGEN_TCG_L2_loadrub_pr(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrub_pi(SHORTCODE) SHORTCODE
#define fGEN_TCG_L2_loadrb_pr(SHORTCODE) SHORTCODE
#define fGEN_TCG_S4_stored_locked(SHORTCODE) \
do { SHORTCODE; READ_PREG(PdV, PdN); } while (0)
+#define fGEN_TCG_STORE(SHORTCODE) \
+ do { \
+ TCGv HALF = tcg_temp_new(); \
+ TCGv BYTE = tcg_temp_new(); \
+ SHORTCODE; \
+ tcg_temp_free(HALF); \
+ tcg_temp_free(BYTE); \
+ } while (0)
+
+#define fGEN_TCG_STORE_pcr(SHIFT, STORE) \
+ do { \
+ TCGv ireg = tcg_temp_new(); \
+ TCGv HALF = tcg_temp_new(); \
+ TCGv BYTE = tcg_temp_new(); \
+ tcg_gen_mov_tl(EA, RxV); \
+ gen_read_ireg(ireg, MuV, SHIFT); \
+ gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+ STORE; \
+ tcg_temp_free(ireg); \
+ tcg_temp_free(HALF); \
+ tcg_temp_free(BYTE); \
+ } while (0)
+
+#define fGEN_TCG_S2_storerb_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV)))
+
+#define fGEN_TCG_S2_storerh_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV)))
+
+#define fGEN_TCG_S2_storerf_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV)))
+
+#define fGEN_TCG_S2_storeri_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV))
+
+#define fGEN_TCG_S2_storerd_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV))
+
+#define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN)))
+
+#define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN)))
+
+#define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \
+ fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
+ fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
+
/*
* Mathematical operations with more than one definition require
* special handling
CHECK_NOSHUF; \
tcg_gen_qemu_ld64(DST, VA, ctx->mem_idx); \
} while (0)
+
+#define MEM_STORE1_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store1i, \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_store1, (void)0))
+#define MEM_STORE1(VA, DATA, SLOT) \
+ MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE2_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store2i, \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_store2, (void)0))
+#define MEM_STORE2(VA, DATA, SLOT) \
+ MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE4_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store4i, \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_store4, (void)0))
+#define MEM_STORE4(VA, DATA, SLOT) \
+ MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE8_FUNC(X) \
+ __builtin_choose_expr(TYPE_INT(X), \
+ gen_store8i, \
+ __builtin_choose_expr(TYPE_TCGV_I64(X), \
+ gen_store8, (void)0))
+#define MEM_STORE8(VA, DATA, SLOT) \
+ MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
#else
#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA))
#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA))
#define fPCALIGN(IMM) IMM = (IMM & ~PCALIGN_MASK)
+#ifdef QEMU_GENERATE
+static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
+{
+ /*
+ * Section 2.2.4 of the Hexagon V67 Programmer's Reference Manual
+ *
+ * The "I" value from a modifier register is divided into two pieces
+ * LSB bits 23:17
+ * MSB bits 31:28
+ * The value is signed
+ *
+ * At the end we shift the result according to the shift argument
+ */
+ TCGv msb = tcg_temp_new();
+ TCGv lsb = tcg_temp_new();
+
+ tcg_gen_extract_tl(lsb, val, 17, 7);
+ tcg_gen_sari_tl(msb, val, 21);
+ tcg_gen_deposit_tl(result, msb, lsb, 0, 7);
+
+ tcg_gen_shli_tl(result, result, shift);
+
+ tcg_temp_free(msb);
+ tcg_temp_free(lsb);
+
+ return result;
+}
+#define fREAD_IREG(VAL, SHIFT) gen_read_ireg(ireg, (VAL), (SHIFT))
+#else
+#define fREAD_IREG(VAL) \
+ (fSXTN(11, 64, (((VAL) & 0xf0000000) >> 21) | ((VAL >> 17) & 0x7f)))
+#endif
+
#define fREAD_LR() (READ_REG(HEX_REG_LR))
#define fWRITE_LR(A) WRITE_RREG(HEX_REG_LR, A)
#define fEA_REG(REG) tcg_gen_mov_tl(EA, REG)
#define fPM_I(REG, IMM) tcg_gen_addi_tl(REG, REG, IMM)
#define fPM_M(REG, MVAL) tcg_gen_add_tl(REG, REG, MVAL)
+#define fPM_CIRI(REG, IMM, MVAL) \
+ do { \
+ TCGv tcgv_siV = tcg_const_tl(siV); \
+ gen_helper_fcircadd(REG, REG, tcgv_siV, MuV, \
+ hex_gpr[HEX_REG_CS0 + MuN]); \
+ tcg_temp_free(tcgv_siV); \
+ } while (0)
#else
#define fEA_IMM(IMM) do { EA = (IMM); } while (0)
#define fEA_REG(REG) do { EA = (REG); } while (0)
gen_load_locked##SIZE##SIGN(DST, EA, ctx->mem_idx);
#endif
+#ifdef QEMU_GENERATE
+#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, insn->slot)
+#else
#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, slot)
+#endif
#ifdef QEMU_GENERATE
#define fSTORE_LOCKED(NUM, SIZE, EA, SRC, PRED) \
gen_store_conditional##SIZE(env, ctx, PdN, PRED, EA, SRC);
#endif
+#ifdef QEMU_GENERATE
+#define GETBYTE_FUNC(X) \
+ __builtin_choose_expr(TYPE_TCGV(X), \
+ gen_get_byte, \
+ __builtin_choose_expr(TYPE_TCGV_I64(X), \
+ gen_get_byte_i64, (void)0))
+#define fGETBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, true)
+#define fGETUBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, false)
+#else
#define fGETBYTE(N, SRC) ((int8_t)((SRC >> ((N) * 8)) & 0xff))
#define fGETUBYTE(N, SRC) ((uint8_t)((SRC >> ((N) * 8)) & 0xff))
+#endif
#define fSETBYTE(N, DST, VAL) \
do { \
DST = (DST & ~(0x0ffLL << ((N) * 8))) | \
(((uint64_t)((VAL) & 0x0ffLL)) << ((N) * 8)); \
} while (0)
+
+#ifdef QEMU_GENERATE
+#define fGETHALF(N, SRC) gen_get_half(HALF, N, SRC, true)
+#define fGETUHALF(N, SRC) gen_get_half(HALF, N, SRC, false)
+#else
#define fGETHALF(N, SRC) ((int16_t)((SRC >> ((N) * 16)) & 0xffff))
#define fGETUHALF(N, SRC) ((uint16_t)((SRC >> ((N) * 16)) & 0xffff))
+#endif
#define fSETHALF(N, DST, VAL) \
do { \
DST = (DST & ~(0x0ffffLL << ((N) * 16))) | \
--- /dev/null
+/*
+ * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+#define DEBUG 0
+#define DEBUG_PRINTF(...) \
+ do { \
+ if (DEBUG) { \
+ printf(__VA_ARGS__); \
+ } \
+ } while (0)
+
+
+#define NBYTES (1 << 8)
+#define NHALFS (NBYTES / sizeof(short))
+#define NWORDS (NBYTES / sizeof(int))
+#define NDOBLS (NBYTES / sizeof(long long))
+
+long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
+int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
+short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
+unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
+
+/*
+ * We use the C preporcessor to deal with the combinations of types
+ */
+
+#define INIT(BUF, N) \
+ void init_##BUF(void) \
+ { \
+ int i; \
+ for (i = 0; i < N; i++) { \
+ BUF[i] = i; \
+ } \
+ } \
+
+INIT(bbuf, NBYTES)
+INIT(hbuf, NHALFS)
+INIT(wbuf, NWORDS)
+INIT(dbuf, NDOBLS)
+
+/*
+ * Macros for performing circular load
+ * RES result
+ * ADDR address
+ * START start address of buffer
+ * LEN length of buffer (in bytes)
+ * INC address increment (in bytes for IMM, elements for REG)
+ */
+#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %2\n\t" \
+ "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
+ : "=r"(RES), "+r"(ADDR) \
+ : "r"(START), "r"(LEN) \
+ : "r4", "m0", "cs0")
+#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
+
+/*
+ * The mreg has the following pieces
+ * mreg[31:28] increment[10:7]
+ * mreg[27:24] K value (used Hexagon v3 and earlier)
+ * mreg[23:17] increment[6:0]
+ * mreg[16:0] circular buffer length
+ */
+static int build_mreg(int inc, int K, int len)
+{
+ return ((inc & 0x780) << 21) |
+ ((K & 0xf) << 24) |
+ ((inc & 0x7f) << 17) |
+ (len & 0x1ffff);
+}
+
+#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %2\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %3\n\t" \
+ "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
+ : "=r"(RES), "+r"(ADDR) \
+ : "r"(build_mreg((INC), 0, (LEN))), \
+ "r"(START) \
+ : "r4", "m1", "cs1")
+#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
+ CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
+
+/*
+ * Macros for performing circular store
+ * VAL value to store
+ * ADDR address
+ * START start address of buffer
+ * LEN length of buffer (in bytes)
+ * INC address increment (in bytes for IMM, elements for REG)
+ */
+#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %1\n\t" \
+ "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
+ : "+r"(ADDR) \
+ : "r"(START), "r"(VAL), "r"(LEN) \
+ : "r4", "m0", "cs0", "memory")
+#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %3\n\t" \
+ "m0 = r4\n\t" \
+ "cs0 = %1\n\t" \
+ "{\n\t" \
+ " r5 = %2\n\t" \
+ " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
+ "}\n\t" \
+ : "+r"(ADDR) \
+ : "r"(START), "r"(VAL), "r"(LEN) \
+ : "r4", "r5", "m0", "cs0", "memory")
+#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %1\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %2\n\t" \
+ "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
+ : "+r"(ADDR) \
+ : "r"(build_mreg((INC), 0, (LEN))), \
+ "r"(START), \
+ "r"(VAL) \
+ : "r4", "m1", "cs1", "memory")
+#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
+ __asm__( \
+ "r4 = %1\n\t" \
+ "m1 = r4\n\t" \
+ "cs1 = %2\n\t" \
+ "{\n\t" \
+ " r5 = %3\n\t" \
+ " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
+ "}\n\t" \
+ : "+r"(ADDR) \
+ : "r"(build_mreg((INC), 0, (LEN))), \
+ "r"(START), \
+ "r"(VAL) \
+ : "r4", "r5", "m1", "cs1", "memory")
+#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
+ CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
+
+
+int err;
+
+/* We'll test increments +1 and -1 */
+void check_load(int i, long long result, int inc, int size)
+{
+ int expect = (i * inc);
+ while (expect >= size) {
+ expect -= size;
+ }
+ while (expect < 0) {
+ expect += size;
+ }
+ if (result != expect) {
+ printf("ERROR(%d): %lld != %d\n", i, result, expect);
+ err++;
+ }
+}
+
+#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
+void circ_test_load_imm_##SZ(void) \
+{ \
+ TYPE *p = (TYPE *)BUF; \
+ int size = 10; \
+ int i; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
+ } \
+ p = (TYPE *)BUF; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
+ } \
+}
+
+TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d)
+TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d)
+TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d)
+TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d)
+TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d)
+TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld)
+
+#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
+void circ_test_load_reg_##SZ(void) \
+{ \
+ TYPE *p = (TYPE *)BUF; \
+ int size = 13; \
+ int i; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, 1, size); \
+ } \
+ p = (TYPE *)BUF; \
+ for (i = 0; i < BUFSIZE; i++) { \
+ TYPE element; \
+ CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+ i, p, element); \
+ check_load(i, element, -1, size); \
+ } \
+}
+
+TEST_LOAD_REG(b, char, bbuf, NBYTES, d)
+TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d)
+TEST_LOAD_REG(h, short, hbuf, NHALFS, d)
+TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d)
+TEST_LOAD_REG(w, int, wbuf, NWORDS, d)
+TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld)
+
+/* The circular stores will wrap around somewhere inside the buffer */
+#define CIRC_VAL(SZ, TYPE, BUFSIZE) \
+TYPE circ_val_##SZ(int i, int inc, int size) \
+{ \
+ int mod = BUFSIZE % size; \
+ int elem = i * inc; \
+ if (elem < 0) { \
+ if (-elem <= size - mod) { \
+ return (elem + BUFSIZE - mod); \
+ } else { \
+ return (elem + BUFSIZE + size - mod); \
+ } \
+ } else if (elem < mod) {\
+ return (elem + BUFSIZE - mod); \
+ } else { \
+ return (elem + BUFSIZE - size - mod); \
+ } \
+}
+
+CIRC_VAL(b, unsigned char, NBYTES)
+CIRC_VAL(h, short, NHALFS)
+CIRC_VAL(w, int, NWORDS)
+CIRC_VAL(d, long long, NDOBLS)
+
+/*
+ * Circular stores should only write to the first "size" elements of the buffer
+ * the remainder of the elements should have BUF[i] == i
+ */
+#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
+void check_store_##SZ(int inc, int size) \
+{ \
+ int i; \
+ for (i = 0; i < size; i++) { \
+ DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
+ i, BUF[i], circ_val_##SZ(i, inc, size)); \
+ if (BUF[i] != circ_val_##SZ(i, inc, size)) { \
+ printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \
+ i, BUF[i], circ_val_##SZ(i, inc, size)); \
+ err++; \
+ } \
+ } \
+ for (i = size; i < BUFSIZE; i++) { \
+ if (BUF[i] != i) { \
+ printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \
+ err++; \
+ } \
+ } \
+}
+
+CHECK_STORE(b, bbuf, NBYTES, x)
+CHECK_STORE(h, hbuf, NHALFS, x)
+CHECK_STORE(w, wbuf, NWORDS, x)
+CHECK_STORE(d, dbuf, NDOBLS, llx)
+
+#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
+void circ_test_store_imm_##SZ(void) \
+{ \
+ unsigned int size = 27; \
+ TYPE *p = BUF; \
+ TYPE val = 0; \
+ int i; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
+ val++; \
+ } \
+ check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
+ p = BUF; \
+ val = 0; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
+ -(INC)); \
+ val++; \
+ } \
+ check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
+}
+
+CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1)
+CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2)
+CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2)
+CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4)
+CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8)
+CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1)
+CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2)
+CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4)
+
+#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
+void circ_test_store_reg_##SZ(void) \
+{ \
+ TYPE *p = BUF; \
+ unsigned int size = 19; \
+ TYPE val = 0; \
+ int i; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
+ val++; \
+ } \
+ check_store_##CHK(1, size); \
+ p = BUF; \
+ val = 0; \
+ init_##BUF(); \
+ for (i = 0; i < BUFSIZE; i++) { \
+ CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
+ val++; \
+ } \
+ check_store_##CHK(-1, size); \
+}
+
+CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0)
+CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0)
+CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16)
+CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0)
+CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0)
+CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0)
+CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0)
+CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0)
+
+/* Test the old scheme used in Hexagon V3 */
+static void circ_test_v3(void)
+{
+ int *p = wbuf;
+ int size = 15;
+ int K = 4; /* 64 bytes */
+ int element;
+ int i;
+
+ init_wbuf();
+
+ for (i = 0; i < NWORDS; i++) {
+ __asm__(
+ "r4 = %2\n\t"
+ "m1 = r4\n\t"
+ "%0 = memw(%1++I:circ(M1))\n\t"
+ : "=r"(element), "+r"(p)
+ : "r"(build_mreg(1, K, size * sizeof(int)))
+ : "r4", "m1");
+ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
+ check_load(i, element, 1, size);
+ }
+}
+
+int main()
+{
+ init_bbuf();
+ init_hbuf();
+ init_wbuf();
+ init_dbuf();
+
+ DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
+ DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
+ DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
+ DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
+ DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
+
+ circ_test_load_imm_b();
+ circ_test_load_imm_ub();
+ circ_test_load_imm_h();
+ circ_test_load_imm_uh();
+ circ_test_load_imm_w();
+ circ_test_load_imm_d();
+
+ circ_test_load_reg_b();
+ circ_test_load_reg_ub();
+ circ_test_load_reg_h();
+ circ_test_load_reg_uh();
+ circ_test_load_reg_w();
+ circ_test_load_reg_d();
+
+ circ_test_store_imm_b();
+ circ_test_store_imm_h();
+ circ_test_store_imm_f();
+ circ_test_store_imm_w();
+ circ_test_store_imm_d();
+ circ_test_store_imm_bnew();
+ circ_test_store_imm_hnew();
+ circ_test_store_imm_wnew();
+
+ circ_test_store_reg_b();
+ circ_test_store_reg_h();
+ circ_test_store_reg_f();
+ circ_test_store_reg_w();
+ circ_test_store_reg_d();
+ circ_test_store_reg_bnew();
+ circ_test_store_reg_hnew();
+ circ_test_store_reg_wnew();
+
+ circ_test_v3();
+
+ puts(err ? "FAIL" : "PASS");
+ return err ? 1 : 0;
+}