#include "qemu/osdep.h"
#include "qapi/error.h"
#ifndef _WIN32
-#include <sys/mman.h>
#endif
#include "qemu/cutils.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#if defined(CONFIG_USER_ONLY)
-#include <qemu.h>
+#include "qemu.h"
#else /* !CONFIG_USER_ONLY */
#include "hw/hw.h"
#include "exec/memory.h"
#include "exec/ram_addr.h"
#include "exec/log.h"
+#include "migration/vmstate.h"
+
#include "qemu/range.h"
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
+#ifdef TARGET_PAGE_BITS_VARY
+int target_page_bits;
+bool target_page_bits_decided;
+#endif
+
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
/* current CPU in the current thread. It is only valid inside
cpu_exec() */
2 = Adaptive rate instruction counting. */
int use_icount;
+bool set_preferred_target_page_bits(int bits)
+{
+ /* The target page size is the lowest common denominator for all
+ * the CPUs in the system, so we can only make it smaller, never
+ * larger. And we can't make it smaller once we've committed to
+ * a particular size.
+ */
+#ifdef TARGET_PAGE_BITS_VARY
+ assert(bits >= TARGET_PAGE_BITS_MIN);
+ if (target_page_bits == 0 || target_page_bits > bits) {
+ if (target_page_bits_decided) {
+ return false;
+ }
+ target_page_bits = bits;
+ }
+#endif
+ return true;
+}
+
#if !defined(CONFIG_USER_ONLY)
+static void finalize_target_page_bits(void)
+{
+#ifdef TARGET_PAGE_BITS_VARY
+ if (target_page_bits == 0) {
+ target_page_bits = TARGET_PAGE_BITS_MIN;
+ }
+ target_page_bits_decided = true;
+#endif
+}
+
typedef struct PhysPageEntry PhysPageEntry;
struct PhysPageEntry {
MemoryRegion iomem;
AddressSpace *as;
hwaddr base;
- uint16_t sub_section[TARGET_PAGE_SIZE];
+ uint16_t sub_section[];
} subpage_t;
#define PHYS_SECTION_UNASSIGNED 0
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{
+ static unsigned alloc_hint = 16;
if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
- map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
+ alloc_hint = map->nodes_nb_alloc;
}
}
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
* and update our entry so we can skip it and go directly to the destination.
*/
-static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
+static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
{
unsigned valid_ptr = P_L2_SIZE;
int valid = 0;
valid_ptr = i;
valid++;
if (p[i].skip) {
- phys_page_compact(&p[i], nodes, compacted);
+ phys_page_compact(&p[i], nodes);
}
}
static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
- DECLARE_BITMAP(compacted, nodes_nb);
-
if (d->phys_map.skip) {
- phys_page_compact(&d->phys_map, d->map.nodes, compacted);
+ phys_page_compact(&d->phys_map, d->map.nodes);
}
}
/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
* the section must cover the entire address space.
*/
- return section->size.hi ||
+ return int128_gethi(section->size) ||
range_covers_byte(section->offset_within_address_space,
- section->size.lo, addr);
+ int128_getlo(section->size), addr);
}
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
hwaddr *xlat, hwaddr *plen)
{
MemoryRegionSection *section;
- AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
+ AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
section = address_space_translate_internal(d, addr, xlat, plen, false);
}
#endif
-#ifndef CONFIG_USER_ONLY
-static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
-
-static int cpu_get_free_index(Error **errp)
+void cpu_exec_unrealizefn(CPUState *cpu)
{
- int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
-
- if (cpu >= MAX_CPUMASK_BITS) {
- error_setg(errp, "Trying to use more CPUs than max of %d",
- MAX_CPUMASK_BITS);
- return -1;
- }
+ CPUClass *cc = CPU_GET_CLASS(cpu);
- bitmap_set(cpu_index_map, cpu, 1);
- return cpu;
-}
+ cpu_list_remove(cpu);
-void cpu_exec_exit(CPUState *cpu)
-{
- if (cpu->cpu_index == -1) {
- /* cpu_index was never allocated by this @cpu or was already freed. */
- return;
+ if (cc->vmsd != NULL) {
+ vmstate_unregister(NULL, cc->vmsd, cpu);
}
-
- bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
- cpu->cpu_index = -1;
-}
-#else
-
-static int cpu_get_free_index(Error **errp)
-{
- CPUState *some_cpu;
- int cpu_index = 0;
-
- CPU_FOREACH(some_cpu) {
- cpu_index++;
+ if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
+ vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
}
- return cpu_index;
-}
-
-void cpu_exec_exit(CPUState *cpu)
-{
}
-#endif
-void cpu_exec_init(CPUState *cpu, Error **errp)
+void cpu_exec_initfn(CPUState *cpu)
{
- CPUClass *cc = CPU_GET_CLASS(cpu);
- Error *local_err = NULL;
-
cpu->as = NULL;
cpu->num_ases = 0;
cpu->memory = system_memory;
object_ref(OBJECT(cpu->memory));
#endif
+}
-#if defined(CONFIG_USER_ONLY)
- cpu_list_lock();
-#endif
- cpu->cpu_index = cpu_get_free_index(&local_err);
- if (local_err) {
- error_propagate(errp, local_err);
-#if defined(CONFIG_USER_ONLY)
- cpu_list_unlock();
-#endif
- return;
- }
- QTAILQ_INSERT_TAIL(&cpus, cpu, node);
-#if defined(CONFIG_USER_ONLY)
- (void) cc;
- cpu_list_unlock();
-#else
+void cpu_exec_realizefn(CPUState *cpu, Error **errp)
+{
+ CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
+
+ cpu_list_add(cpu);
+
+#ifndef CONFIG_USER_ONLY
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
}
#if defined(CONFIG_USER_ONLY)
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
{
+ mmap_lock();
+ tb_lock();
tb_invalidate_phys_page_range(pc, pc + 1, 0);
+ tb_unlock();
+ mmap_unlock();
}
#else
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
int asidx = cpu_asidx_from_attrs(cpu, attrs);
if (phys != -1) {
+ /* Locks grabbed by tb_invalidate_phys_addr */
tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
phys | (pc & ~TARGET_PAGE_MASK));
}
fprintf(stderr, "\n");
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
if (qemu_log_separate()) {
+ qemu_log_lock();
qemu_log("qemu: fatal: ");
qemu_log_vprintf(fmt, ap2);
qemu_log("\n");
log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
qemu_log_flush();
+ qemu_log_unlock();
qemu_log_close();
}
va_end(ap2);
}
#ifdef __linux__
+static int64_t get_file_size(int fd)
+{
+ int64_t size = lseek(fd, 0, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ return size;
+}
+
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path,
char *filename;
char *sanitized_name;
char *c;
- void *area;
+ void *area = MAP_FAILED;
int fd = -1;
- int64_t page_size;
+ int64_t file_size;
if (kvm_enabled() && !kvm_has_sync_mmu()) {
error_setg(errp,
*/
}
- page_size = qemu_fd_getpagesize(fd);
- block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
+ block->page_size = qemu_fd_getpagesize(fd);
+ block->mr->align = block->page_size;
+#if defined(__s390x__)
+ if (kvm_enabled()) {
+ block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
+ }
+#endif
- if (memory < page_size) {
+ file_size = get_file_size(fd);
+
+ if (memory < block->page_size) {
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
- "or larger than page size 0x%" PRIx64,
- memory, page_size);
+ "or larger than page size 0x%zx",
+ memory, block->page_size);
goto error;
}
- memory = ROUND_UP(memory, page_size);
+ if (file_size > 0 && file_size < memory) {
+ error_setg(errp, "backing store %s size 0x%" PRIx64
+ " does not match 'size' option 0x" RAM_ADDR_FMT,
+ path, file_size, memory);
+ goto error;
+ }
+
+ memory = ROUND_UP(memory, block->page_size);
/*
* ftruncate is not supported by hugetlbfs in older
* hosts, so don't bother bailing out on errors.
* If anything goes wrong with it under other filesystems,
* mmap will fail.
+ *
+ * Do not truncate the non-empty backend file to avoid corrupting
+ * the existing data in the file. Disabling shrinking is not
+ * enough. For example, the current vNVDIMM implementation stores
+ * the guest NVDIMM labels at the end of the backend file. If the
+ * backend file is later extended, QEMU will not be able to find
+ * those labels. Therefore, extending the non-empty backend file
+ * is disabled as well.
*/
- if (ftruncate(fd, memory)) {
+ if (!file_size && ftruncate(fd, memory)) {
perror("ftruncate");
}
}
if (mem_prealloc) {
- os_mem_prealloc(fd, area, memory);
+ os_mem_prealloc(fd, area, memory, errp);
+ if (errp && *errp) {
+ goto error;
+ }
}
block->fd = fd;
return area;
error:
+ if (area != MAP_FAILED) {
+ qemu_ram_munmap(area, memory);
+ }
if (unlink_on_error) {
unlink(path);
}
}
}
+size_t qemu_ram_pagesize(RAMBlock *rb)
+{
+ return rb->page_size;
+}
+
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
if (new_block->host) {
qemu_ram_setup_dump(new_block->host, new_block->max_length);
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
+ /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
- if (kvm_enabled()) {
- kvm_setup_guest_memory(new_block->host, new_block->max_length);
- }
}
}
new_block->max_length = max_size;
assert(max_size >= size);
new_block->fd = -1;
+ new_block->page_size = getpagesize();
new_block->host = host;
if (host) {
new_block->flags |= RAM_PREALLOC;
ram_addr = xen_ram_addr_from_mapcache(ptr);
block = qemu_get_ram_block(ram_addr);
if (block) {
- *offset = (host - block->host);
+ *offset = ram_addr - block->offset;
}
rcu_read_unlock();
return block;
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
uint64_t val, unsigned size)
{
+ bool locked = false;
+
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+ locked = true;
+ tb_lock();
tb_invalidate_phys_page_fast(ram_addr, size);
}
switch (size) {
default:
abort();
}
+
+ if (locked) {
+ tb_unlock();
+ }
+
/* Set both VGA and migration bits for simplicity and to remove
* the notdirty callback faster.
*/
continue;
}
cpu->watchpoint_hit = wp;
+
+ /* The tb_lock will be reset when cpu_loop_exit or
+ * cpu_loop_exit_noexc longjmp back into the cpu_exec
+ * main loop.
+ */
+ tb_lock();
tb_check_watchpoint(cpu);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
} else {
cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
- cpu_resume_from_signal(cpu, NULL);
+ cpu_loop_exit_noexc(cpu);
}
}
} else {
{
subpage_t *mmio;
- mmio = g_malloc0(sizeof(subpage_t));
-
+ mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
mmio->as = as;
mmio->base = base;
memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
* may have split the RCU critical section.
*/
d = atomic_rcu_read(&cpuas->as->dispatch);
- cpuas->memory_dispatch = d;
+ atomic_rcu_set(&cpuas->memory_dispatch, d);
tlb_flush(cpuas->cpu, 1);
}
cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
}
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
+ tb_lock();
tb_invalidate_phys_range(addr, addr + length);
+ tb_unlock();
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
}
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
void cpu_exec_init_all(void)
{
qemu_mutex_init(&ram_list.mutex);
+ /* The data structures we set up here depend on knowing the page size,
+ * so no more changes can be made after this point.
+ * In an ideal world, nothing we did before we had finished the
+ * machine setup would care about the target page size, and we could
+ * do this much later, rather than requiring board models to state
+ * up front what their requirements are.
+ */
+ finalize_target_page_bits();
io_mem_init();
memory_map_init();
qemu_mutex_init(&map_client_list_lock);