cpu: Move the softmmu tlb to CPUNegativeOffsetState

[mirror_qemu.git] / include / exec / cpu-defs.h
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h

index 4ff62f32bf8d5ef8532fc21b8fca4f1a3fc8681e..006f8db9cfac674da856942408abe5b5f44b9786 100644 (file)
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -33,9 +33,30 @@
  #include "exec/hwaddr.h"
  #endif
  #include "exec/memattrs.h"
+#include "qom/cpu.h"
+
+#include "cpu-param.h"
  
  #ifndef TARGET_LONG_BITS
-#error TARGET_LONG_BITS must be defined before including this header
+# error TARGET_LONG_BITS must be defined in cpu-param.h
+#endif
+#ifndef NB_MMU_MODES
+# error NB_MMU_MODES must be defined in cpu-param.h
+#endif
+#ifndef TARGET_PHYS_ADDR_SPACE_BITS
+# error TARGET_PHYS_ADDR_SPACE_BITS must be defined in cpu-param.h
+#endif
+#ifndef TARGET_VIRT_ADDR_SPACE_BITS
+# error TARGET_VIRT_ADDR_SPACE_BITS must be defined in cpu-param.h
+#endif
+#ifndef TARGET_PAGE_BITS
+# ifdef TARGET_PAGE_BITS_VARY
+#  ifndef TARGET_PAGE_BITS_MIN
+#   error TARGET_PAGE_BITS_MIN must be defined in cpu-param.h
+#  endif
+# else
+#  error TARGET_PAGE_BITS must be defined in cpu-param.h
+# endif
  #endif
  
  #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
@@ -58,6 +79,7 @@ typedef uint64_t target_ulong;
  #endif
  
  #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
+
  /* use a fully associative victim tlb of 8 entries */
  #define CPU_VTLB_SIZE 8
  
@@ -67,37 +89,23 @@ typedef uint64_t target_ulong;
  #define CPU_TLB_ENTRY_BITS 5
  #endif
  
-/* TCG_TARGET_TLB_DISPLACEMENT_BITS is used in CPU_TLB_BITS to ensure that
- * the TLB is not unnecessarily small, but still small enough for the
- * TLB lookup instruction sequence used by the TCG target.
- *
- * TCG will have to generate an operand as large as the distance between
- * env and the tlb_table[NB_MMU_MODES - 1][0].addend.  For simplicity,
- * the TCG targets just round everything up to the next power of two, and
- * count bits.  This works because: 1) the size of each TLB is a largish
- * power of two, 2) and because the limit of the displacement is really close
- * to a power of two, 3) the offset of tlb_table[0][0] inside env is smaller
- * than the size of a TLB.
- *
- * For example, the maximum displacement 0xFFF0 on PPC and MIPS, but TCG
- * just says "the displacement is 16 bits".  TCG_TARGET_TLB_DISPLACEMENT_BITS
- * then ensures that tlb_table at least 0x8000 bytes large ("not unnecessarily
- * small": 2^15).  The operand then will come up smaller than 0xFFF0 without
- * any particular care, because the TLB for a single MMU mode is larger than
- * 0x10000-0xFFF0=16 bytes.  In the end, the maximum value of the operand
- * could be something like 0xC000 (the offset of the last TLB table) plus
- * 0x18 (the offset of the addend field in each TLB entry) plus the offset
- * of tlb_table inside env (which is non-trivial but not huge).
- */
-#define CPU_TLB_BITS                                             \
-    MIN(8,                                                       \
-        TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -  \
-        (NB_MMU_MODES <= 1 ? 0 :                                 \
-         NB_MMU_MODES <= 2 ? 1 :                                 \
-         NB_MMU_MODES <= 4 ? 2 :                                 \
-         NB_MMU_MODES <= 8 ? 3 : 4))
+#define CPU_TLB_DYN_MIN_BITS 6
+#define CPU_TLB_DYN_DEFAULT_BITS 8
  
-#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+# if HOST_LONG_BITS == 32
+/* Make sure we do not require a double-word shift for the TLB load */
+#  define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS)
+# else /* HOST_LONG_BITS == 64 */
+/*
+ * Assuming TARGET_PAGE_BITS==12, with 2**22 entries we can cover 2**(22+12) ==
+ * 2**34 == 16G of address space. This is roughly what one would expect a
+ * TLB to cover in a modern (as of 2018) x86_64 CPU. For instance, Intel
+ * Skylake's Level-2 STLB has 16 1G entries.
+ * Also, make sure we do not size the TLB past the guest's address space.
+ */
+#  define CPU_TLB_DYN_MAX_BITS                                  \
+    MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
+# endif
  
  typedef struct CPUTLBEntry {
      /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
@@ -141,28 +149,97 @@ typedef struct CPUIOTLBEntry {
      MemTxAttrs attrs;
  } CPUIOTLBEntry;
  
-#define CPU_COMMON_TLB \
-    /* The meaning of the MMU modes is defined in the target code. */   \
-    /* tlb_lock serializes updates to tlb_table and tlb_v_table */      \
-    QemuSpin tlb_lock;                                                  \
-    CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
-    CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE];               \
-    CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE];                    \
-    CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];                 \
-    size_t tlb_flush_count;                                             \
-    target_ulong tlb_flush_addr;                                        \
-    target_ulong tlb_flush_mask;                                        \
-    target_ulong vtlb_index;                                            \
+/*
+ * Data elements that are per MMU mode, minus the bits accessed by
+ * the TCG fast path.
+ */
+typedef struct CPUTLBDesc {
+    /*
+     * Describe a region covering all of the large pages allocated
+     * into the tlb.  When any page within this region is flushed,
+     * we must flush the entire tlb.  The region is matched if
+     * (addr & large_page_mask) == large_page_addr.
+     */
+    target_ulong large_page_addr;
+    target_ulong large_page_mask;
+    /* host time (in ns) at the beginning of the time window */
+    int64_t window_begin_ns;
+    /* maximum number of entries observed in the window */
+    size_t window_max_entries;
+    size_t n_used_entries;
+    /* The next index to use in the tlb victim table.  */
+    size_t vindex;
+    /* The tlb victim table, in two parts.  */
+    CPUTLBEntry vtable[CPU_VTLB_SIZE];
+    CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
+    /* The iotlb.  */
+    CPUIOTLBEntry *iotlb;
+} CPUTLBDesc;
+
+/*
+ * Data elements that are per MMU mode, accessed by the fast path.
+ * The structure is aligned to aid loading the pair with one insn.
+ */
+typedef struct CPUTLBDescFast {
+    /* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
+    uintptr_t mask;
+    /* The array of tlb entries itself. */
+    CPUTLBEntry *table;
+} CPUTLBDescFast QEMU_ALIGNED(2 * sizeof(void *));
+
+/*
+ * Data elements that are shared between all MMU modes.
+ */
+typedef struct CPUTLBCommon {
+    /* Serialize updates to f.table and d.vtable, and others as noted. */
+    QemuSpin lock;
+    /*
+     * Within dirty, for each bit N, modifications have been made to
+     * mmu_idx N since the last time that mmu_idx was flushed.
+     * Protected by tlb_c.lock.
+     */
+    uint16_t dirty;
+    /*
+     * Statistics.  These are not lock protected, but are read and
+     * written atomically.  This allows the monitor to print a snapshot
+     * of the stats without interfering with the cpu.
+     */
+    size_t full_flush_count;
+    size_t part_flush_count;
+    size_t elide_flush_count;
+} CPUTLBCommon;
+
+/*
+ * The entire softmmu tlb, for all MMU modes.
+ * The meaning of each of the MMU modes is defined in the target code.
+ * Since this is placed within CPUNegativeOffsetState, the smallest
+ * negative offsets are at the end of the struct.
+ */
+typedef struct CPUTLB {
+    CPUTLBCommon c;
+    CPUTLBDesc d[NB_MMU_MODES];
+    CPUTLBDescFast f[NB_MMU_MODES];
+} CPUTLB;
+
+/* This will be used by TCG backends to compute offsets.  */
+#define TLB_MASK_TABLE_OFS(IDX) \
+    ((int)offsetof(ArchCPU, neg.tlb.f[IDX]) - (int)offsetof(ArchCPU, env))
  
  #else
  
-#define CPU_COMMON_TLB
+typedef struct CPUTLB { } CPUTLB;
  
-#endif
+#endif  /* !CONFIG_USER_ONLY && CONFIG_TCG */
  
+#define CPU_COMMON  /* Nothing */
  
-#define CPU_COMMON                                                      \
-    /* soft mmu support */                                              \
-    CPU_COMMON_TLB                                                      \
+/*
+ * This structure must be placed in ArchCPU immedately
+ * before CPUArchState, as a field named "neg".
+ */
+typedef struct CPUNegativeOffsetState {
+    CPUTLB tlb;
+    IcountDecr icount_decr;
+} CPUNegativeOffsetState;
  
  #endif