Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210913-3' into...

[mirror_qemu.git] / target / arm / helper-a64.c
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c

index 060699b901eb4a2dd5bfb3ed9a03e5386383f446..19445b3c94724967d32da7da6bf1937aea4a893b 100644 (file)
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -6,7 +6,7 @@
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
   *
   * This library is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -18,12 +18,13 @@
   */
  
  #include "qemu/osdep.h"
+#include "qemu/units.h"
  #include "cpu.h"
  #include "exec/gdbstub.h"
  #include "exec/helper-proto.h"
  #include "qemu/host-utils.h"
  #include "qemu/log.h"
-#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
  #include "qemu/bitops.h"
  #include "internals.h"
  #include "qemu/crc32c.h"
@@ -31,7 +32,7 @@
  #include "exec/cpu_ldst.h"
  #include "qemu/int128.h"
  #include "qemu/atomic128.h"
-#include "tcg.h"
+#include "tcg/tcg.h"
  #include "fpu/softfloat.h"
  #include <zlib.h> /* For crc32 */
  
@@ -70,7 +71,7 @@ static void daif_check(CPUARMState *env, uint32_t op,
                         uint32_t imm, uintptr_t ra)
  {
      /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set.  */
-    if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) {
+    if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
          raise_exception_ra(env, EXCP_UDEF,
                             syn_aa64_sysregtrap(0, extract32(op, 0, 3),
                                                 extract32(op, 3, 3), 4,
@@ -178,38 +179,6 @@ float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
      return float64_mul(a, b, fpst);
  }
  
-uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
-                          uint32_t rn, uint32_t numregs)
-{
-    /* Helper function for SIMD TBL and TBX. We have to do the table
-     * lookup part for the 64 bits worth of indices we're passed in.
-     * result is the initial results vector (either zeroes for TBL
-     * or some guest values for TBX), rn the register number where
-     * the table starts, and numregs the number of registers in the table.
-     * We return the results of the lookups.
-     */
-    int shift;
-
-    for (shift = 0; shift < 64; shift += 8) {
-        int index = extract64(indices, shift, 8);
-        if (index < 16 * numregs) {
-            /* Convert index (a byte offset into the virtual table
-             * which is a series of 128-bit vectors concatenated)
-             * into the correct register element plus a bit offset
-             * into that element, bearing in mind that the table
-             * can wrap around from V31 to V0.
-             */
-            int elt = (rn * 2 + (index >> 3)) % 64;
-            int bitidx = (index & 7) * 8;
-            uint64_t *q = aa64_vfp_qreg(env, elt >> 1);
-            uint64_t val = extract64(q[elt & 1], bitidx, 8);
-
-            result = deposit64(result, shift, 8, val);
-        }
-    }
-    return result;
-}
-
  /* 64bit/double versions of the neon float compare functions */
  uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
  {
@@ -233,17 +202,6 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
   * versions, these do a fully fused multiply-add or
   * multiply-add-and-halve.
   */
-#define float16_two make_float16(0x4000)
-#define float16_three make_float16(0x4200)
-#define float16_one_point_five make_float16(0x3e00)
-
-#define float32_two make_float32(0x40000000)
-#define float32_three make_float32(0x40400000)
-#define float32_one_point_five make_float32(0x3fc00000)
-
-#define float64_two make_float64(0x4000000000000000ULL)
-#define float64_three make_float64(0x4008000000000000ULL)
-#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
  
  uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
  {
@@ -407,7 +365,9 @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
          float16 nan = a;
          if (float16_is_signaling_nan(a, fpst)) {
              float_raise(float_flag_invalid, fpst);
-            nan = float16_silence_nan(a, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float16_silence_nan(a, fpst);
+            }
          }
          if (fpst->default_nan_mode) {
              nan = float16_default_nan(fpst);
@@ -438,7 +398,9 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
          float32 nan = a;
          if (float32_is_signaling_nan(a, fpst)) {
              float_raise(float_flag_invalid, fpst);
-            nan = float32_silence_nan(a, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float32_silence_nan(a, fpst);
+            }
          }
          if (fpst->default_nan_mode) {
              nan = float32_default_nan(fpst);
@@ -469,7 +431,9 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
          float64 nan = a;
          if (float64_is_signaling_nan(a, fpst)) {
              float_raise(float_flag_invalid, fpst);
-            nan = float64_silence_nan(a, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float64_silence_nan(a, fpst);
+            }
          }
          if (fpst->default_nan_mode) {
              nan = float64_default_nan(fpst);
@@ -552,7 +516,7 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
  
  #ifdef CONFIG_USER_ONLY
      /* ??? Enforce alignment.  */
-    uint64_t *haddr = g2h(addr);
+    uint64_t *haddr = g2h(env_cpu(env), addr);
  
      set_helper_retaddr(ra);
      o0 = ldq_le_p(haddr + 0);
@@ -600,7 +564,7 @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
  
      cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
      newv = int128_make128(new_lo, new_hi);
-    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
  
      success = int128_eq(oldv, cmpv);
      return !success;
@@ -622,7 +586,7 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
  
  #ifdef CONFIG_USER_ONLY
      /* ??? Enforce alignment.  */
-    uint64_t *haddr = g2h(addr);
+    uint64_t *haddr = g2h(env_cpu(env), addr);
  
      set_helper_retaddr(ra);
      o1 = ldq_be_p(haddr + 0);
@@ -674,7 +638,7 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
       */
      cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
      newv = int128_make128(new_hi, new_lo);
-    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
  
      success = int128_eq(oldv, cmpv);
      return !success;
@@ -696,7 +660,7 @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
  
      cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
      newv = int128_make128(new_lo, new_hi);
-    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
  
      env->xregs[rs] = int128_getlo(oldv);
      env->xregs[rs + 1] = int128_gethi(oldv);
@@ -717,7 +681,7 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
  
      cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
      newv = int128_make128(new_lo, new_hi);
-    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
  
      env->xregs[rs + 1] = int128_getlo(oldv);
      env->xregs[rs] = int128_gethi(oldv);
@@ -955,6 +919,26 @@ static int el_from_spsr(uint32_t spsr)
      }
  }
  
+static void cpsr_write_from_spsr_elx(CPUARMState *env,
+                                     uint32_t val)
+{
+    uint32_t mask;
+
+    /* Save SPSR_ELx.SS into PSTATE. */
+    env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
+    val &= ~PSTATE_SS;
+
+    /* Move DIT to the correct location for CPSR */
+    if (val & PSTATE_DIT) {
+        val &= ~PSTATE_DIT;
+        val |= CPSR_DIT;
+    }
+
+    mask = aarch32_cpsr_valid_mask(env->features, \
+        &env_archcpu(env)->isar);
+    cpsr_write(env, val, mask, CPSRWriteRaw);
+}
+
  void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
  {
      int cur_el = arm_current_el(env);
@@ -982,8 +966,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
      if (new_el == -1) {
          goto illegal_return;
      }
-    if (new_el > cur_el
-        || (new_el == 2 && !arm_feature(env, ARM_FEATURE_EL2))) {
+    if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) {
          /* Disallow return to an EL which is unimplemented or higher
           * than the current one.
           */
@@ -995,11 +978,6 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
          goto illegal_return;
      }
  
-    if (new_el == 2 && arm_is_secure_below_el3(env)) {
-        /* Return to the non-existent secure-EL2 */
-        goto illegal_return;
-    }
-
      if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
          goto illegal_return;
      }
@@ -1014,9 +992,9 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
           * will sort the register banks out for us, and we've already
           * caught all the bad-mode cases in el_from_spsr().
           */
-        cpsr_write(env, spsr, ~0, CPSRWriteRaw);
+        cpsr_write_from_spsr_elx(env, spsr);
          if (!arm_singlestep_active(env)) {
-            env->uncached_cpsr &= ~PSTATE_SS;
+            env->pstate &= ~PSTATE_SS;
          }
          aarch64_sync_64_to_32(env);
  
@@ -1025,21 +1003,46 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
          } else {
              env->regs[15] = new_pc & ~0x3;
          }
+        helper_rebuild_hflags_a32(env, new_el);
          qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
                        "AArch32 EL%d PC 0x%" PRIx32 "\n",
                        cur_el, new_el, env->regs[15]);
      } else {
+        int tbii;
+
          env->aarch64 = 1;
+        spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
          pstate_write(env, spsr);
          if (!arm_singlestep_active(env)) {
              env->pstate &= ~PSTATE_SS;
          }
          aarch64_restore_sp(env, new_el);
+        helper_rebuild_hflags_a64(env, new_el);
+
+        /*
+         * Apply TBI to the exception return address.  We had to delay this
+         * until after we selected the new EL, so that we could select the
+         * correct TBI+TBID bits.  This is made easier by waiting until after
+         * the hflags rebuild, since we can pull the composite TBII field
+         * from there.
+         */
+        tbii = EX_TBFLAG_A64(env->hflags, TBII);
+        if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
+            /* TBI is enabled. */
+            int core_mmu_idx = cpu_mmu_index(env, false);
+            if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) {
+                new_pc = sextract64(new_pc, 0, 56);
+            } else {
+                new_pc = extract64(new_pc, 0, 56);
+            }
+        }
          env->pc = new_pc;
+
          qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
                        "AArch64 EL%d PC 0x%" PRIx64 "\n",
                        cur_el, new_el, env->pc);
      }
+
      /*
       * Note that cur_el can never be 0.  If new_el is 0, then
       * el0_a64 is return_to_aa64, else el0_a64 is ignored.
@@ -1068,6 +1071,7 @@ illegal_return:
      if (!arm_singlestep_active(env)) {
          env->pstate &= ~PSTATE_SS;
      }
+    helper_rebuild_hflags_a64(env, cur_el);
      qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
                    "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
  }
@@ -1083,4 +1087,50 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
      return float16_sqrt(a, s);
  }
  
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+    /*
+     * Implement DC ZVA, which zeroes a fixed-length block of memory.
+     * Note that we do not implement the (architecturally mandated)
+     * alignment fault for attempts to use this on Device memory
+     * (which matches the usual QEMU behaviour of not implementing either
+     * alignment faults or any memory attribute handling).
+     */
+    int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
+    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+    int mmu_idx = cpu_mmu_index(env, false);
+    void *mem;
+
+    /*
+     * Trapless lookup.  In addition to actual invalid page, may
+     * return NULL for I/O, watchpoints, clean pages, etc.
+     */
+    mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+
+#ifndef CONFIG_USER_ONLY
+    if (unlikely(!mem)) {
+        uintptr_t ra = GETPC();
+
+        /*
+         * Trap if accessing an invalid page.  DC_ZVA requires that we supply
+         * the original pointer for an invalid page.  But watchpoints require
+         * that we probe the actual space.  So do both.
+         */
+        (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
+        mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
+
+        if (unlikely(!mem)) {
+            /*
+             * The only remaining reason for mem == NULL is I/O.
+             * Just do a series of byte writes as the architecture demands.
+             */
+            for (int i = 0; i < blocklen; i++) {
+                cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
+            }
+            return;
+        }
+    }
+#endif
  
+    memset(mem, 0, blocklen);
+}