target/arm: Use clmul_16* routines

author Richard Henderson <richard.henderson@linaro.org>

Tue, 11 Jul 2023 08:26:24 +0000 (09:26 +0100)

committer Richard Henderson <richard.henderson@linaro.org>

Fri, 15 Sep 2023 13:57:00 +0000 (13:57 +0000)
author Richard Henderson <richard.henderson@linaro.org>
Tue, 11 Jul 2023 08:26:24 +0000 (09:26 +0100)
committer Richard Henderson <richard.henderson@linaro.org>
Fri, 15 Sep 2023 13:57:00 +0000 (13:57 +0000)
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c

index 96ddfb4b3a30d39653e352027e464d6c39b300c7..c666a96ba17d41e5c6d39faf6dd19e33ccf5f9cb 100644 (file)
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -985,14 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
   * Polynomial multiply. We can always do this generating 64 bits
   * of the result at a time, so we don't need to use DO_2OP_L.
   */
-#define VMULLPW_MASK 0x0000ffff0000ffffULL
-#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK)
-#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16)
-
  DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even)
  DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd)
-DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW)
-DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW)
+DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even)
+DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd)
  
  /*
   * Because the computation type is at least twice as large as required,
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c

index cd630ff905f31788a3b006fa27e99f052a19236a..5def86b5730c1f7bb82caad00825cfe492a32e88 100644 (file)
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2029,19 +2029,6 @@ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
      clear_tail(d, opr_sz, simd_maxsz(desc));
  }
  
-uint64_t pmull_w(uint64_t op1, uint64_t op2)
-{
-    uint64_t result = 0;
-    int i;
-    for (i = 0; i < 16; ++i) {
-        uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff;
-        result ^= op2 & mask;
-        op1 >>= 1;
-        op2 <<= 1;
-    }
-    return result;
-}
-
  void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
  {
      int hi = simd_data(desc);
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h

index c4afba6d9f12c6e98b39d632ca9e367699c48840..3ca1b94ccf989e151ca308337111b63480432033 100644 (file)
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -219,12 +219,6 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
  int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
  int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
  
-/*
- * 16 x 16 -> 32 vector polynomial multiply where the inputs are
- * in the low 16 bits of each 32-bit element
- */
-uint64_t pmull_w(uint64_t op1, uint64_t op2);
-
  /**
   * bfdotadd:
   * @sum: addend
author	Richard Henderson <richard.henderson@linaro.org>
	Tue, 11 Jul 2023 08:26:24 +0000 (09:26 +0100)
committer	Richard Henderson <richard.henderson@linaro.org>
	Fri, 15 Sep 2023 13:57:00 +0000 (13:57 +0000)
target/arm/tcg/mve_helper.c		patch \| blob \| blame \| history
target/arm/tcg/vec_helper.c		patch \| blob \| blame \| history
target/arm/tcg/vec_internal.h		patch \| blob \| blame \| history