#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
#include "sysemu/tcg.h"
+#include "sysemu/qtest.h"
#include "qemu/timer.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "sysemu/hw_accel.h"
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
-#include "trace-root.h"
+#include "trace/trace-root.h"
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <linux/falloc.h>
#include "monitor/monitor.h"
+#ifdef CONFIG_LIBDAXCTL
+#include <daxctl/libdaxctl.h>
+#endif
+
//#define DEBUG_SUBPAGE
#if !defined(CONFIG_USER_ONLY)
static MemoryRegion io_mem_unassigned;
#endif
-CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
-
-/* current CPU in the current thread. It is only valid inside
- cpu_exec() */
-__thread CPUState *current_cpu;
-
uintptr_t qemu_host_page_size;
intptr_t qemu_host_page_mask;
}
};
-#endif
-
-CPUState *qemu_get_cpu(int index)
-{
- CPUState *cpu;
-
- CPU_FOREACH(cpu) {
- if (cpu->cpu_index == index) {
- return cpu;
- }
- }
-
- return NULL;
-}
-
-#if !defined(CONFIG_USER_ONLY)
void cpu_address_space_init(CPUState *cpu, int asidx,
const char *prefix, MemoryRegion *mr)
{
{
CPUClass *cc = CPU_GET_CLASS(cpu);
+ tlb_destroy(cpu);
cpu_list_remove(cpu);
if (cc->vmsd != NULL) {
DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
MemoryRegion *),
#endif
+ DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false),
DEFINE_PROP_END_OF_LIST(),
};
qemu_plugin_vcpu_init_hook(cpu);
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+ assert(cc->vmsd == NULL);
+#else /* !CONFIG_USER_ONLY */
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
}
int flags, CPUWatchpoint **watchpoint)
{
CPUWatchpoint *wp;
+ vaddr in_page;
/* forbid ranges which are empty or run off the end of the address space */
if (len == 0 || (addr + len - 1) < addr) {
QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
}
- tlb_flush_page(cpu, addr);
+ in_page = -(addr | TARGET_PAGE_MASK);
+ if (len <= in_page) {
+ tlb_flush_page(cpu, addr);
+ } else {
+ tlb_flush(cpu);
+ }
if (watchpoint)
*watchpoint = wp;
int ret = 0;
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- if (watchpoint_address_matches(wp, addr, TARGET_PAGE_SIZE)) {
+ if (watchpoint_address_matches(wp, addr, len)) {
ret |= wp->flags;
}
}
unsigned client)
{
DirtyMemoryBlocks *blocks;
- unsigned long end, page;
+ unsigned long end, page, start_page;
bool dirty = false;
RAMBlock *ramblock;
uint64_t mr_offset, mr_size;
}
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
- page = start >> TARGET_PAGE_BITS;
+ start_page = start >> TARGET_PAGE_BITS;
+ page = start_page;
WITH_RCU_READ_LOCK_GUARD() {
blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
page += num;
}
- mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset;
- mr_size = (end - page) << TARGET_PAGE_BITS;
+ mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset;
+ mr_size = (end - start_page) << TARGET_PAGE_BITS;
memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
}
long qemu_minrampagesize(void)
{
long hpsize = LONG_MAX;
- long mainrampagesize;
- Object *memdev_root;
- MachineState *ms = MACHINE(qdev_get_machine());
-
- mainrampagesize = qemu_mempath_getpagesize(mem_path);
-
- /* it's possible we have memory-backend objects with
- * hugepage-backed RAM. these may get mapped into system
- * address space via -numa parameters or memory hotplug
- * hooks. we want to take these into account, but we
- * also want to make sure these supported hugepage
- * sizes are applicable across the entire range of memory
- * we may boot from, so we take the min across all
- * backends, and assume normal pages in cases where a
- * backend isn't backed by hugepages.
- */
- memdev_root = object_resolve_path("/objects", NULL);
- if (memdev_root) {
- object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
- }
- if (hpsize == LONG_MAX) {
- /* No additional memory regions found ==> Report main RAM page size */
- return mainrampagesize;
- }
-
- /* If NUMA is disabled or the NUMA nodes are not backed with a
- * memory-backend, then there is at least one node using "normal" RAM,
- * so if its page size is smaller we have got to report that size instead.
- */
- if (hpsize > mainrampagesize &&
- (ms->numa_state == NULL ||
- ms->numa_state->num_nodes == 0 ||
- ms->numa_state->nodes[0].node_memdev == NULL)) {
- static bool warned;
- if (!warned) {
- error_report("Huge page support disabled (n/a for main memory).");
- warned = true;
- }
- return mainrampagesize;
- }
+ Object *memdev_root = object_resolve_path("/objects", NULL);
+ object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
return hpsize;
}
long qemu_maxrampagesize(void)
{
- long pagesize = qemu_mempath_getpagesize(mem_path);
+ long pagesize = 0;
Object *memdev_root = object_resolve_path("/objects", NULL);
- if (memdev_root) {
- object_child_foreach(memdev_root, find_max_backend_pagesize,
- &pagesize);
- }
+ object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize);
return pagesize;
}
#else
return size;
}
+static int64_t get_file_align(int fd)
+{
+ int64_t align = -1;
+#if defined(__linux__) && defined(CONFIG_LIBDAXCTL)
+ struct stat st;
+
+ if (fstat(fd, &st) < 0) {
+ return -errno;
+ }
+
+ /* Special handling for devdax character devices */
+ if (S_ISCHR(st.st_mode)) {
+ g_autofree char *path = NULL;
+ g_autofree char *rpath = NULL;
+ struct daxctl_ctx *ctx;
+ struct daxctl_region *region;
+ int rc = 0;
+
+ path = g_strdup_printf("/sys/dev/char/%d:%d",
+ major(st.st_rdev), minor(st.st_rdev));
+ rpath = realpath(path, NULL);
+
+ rc = daxctl_new(&ctx);
+ if (rc) {
+ return -1;
+ }
+
+ daxctl_region_foreach(ctx, region) {
+ if (strstr(rpath, daxctl_region_get_path(region))) {
+ align = daxctl_region_get_align(region);
+ break;
+ }
+ }
+ daxctl_unref(ctx);
+ }
+#endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */
+
+ return align;
+}
+
static int file_ram_open(const char *path,
const char *region_name,
bool *created,
bool truncate,
Error **errp)
{
- Error *err = NULL;
- MachineState *ms = MACHINE(qdev_get_machine());
void *area;
block->page_size = qemu_fd_getpagesize(fd);
return NULL;
}
- if (mem_prealloc) {
- os_mem_prealloc(fd, area, memory, ms->smp.cpus, &err);
- if (err) {
- error_propagate(errp, err);
- qemu_ram_munmap(fd, area, memory);
- return NULL;
- }
- }
-
block->fd = fd;
return area;
}
*/
int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
{
+ const ram_addr_t unaligned_size = newsize;
+
assert(block);
newsize = HOST_PAGE_ALIGN(newsize);
if (block->used_length == newsize) {
+ /*
+ * We don't have to resize the ram block (which only knows aligned
+ * sizes), however, we have to notify if the unaligned size changed.
+ */
+ if (unaligned_size != memory_region_size(block->mr)) {
+ memory_region_set_size(block->mr, unaligned_size);
+ if (block->resized) {
+ block->resized(block->idstr, unaligned_size, block->host);
+ }
+ }
return 0;
}
block->used_length = newsize;
cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
DIRTY_CLIENTS_ALL);
- memory_region_set_size(block->mr, newsize);
+ memory_region_set_size(block->mr, unaligned_size);
if (block->resized) {
- block->resized(block->idstr, newsize, block->host);
+ block->resized(block->idstr, unaligned_size, block->host);
}
return 0;
}
* Otherwise no-op.
* @Note: this is supposed to be a synchronous op.
*/
-void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length)
+void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
{
- void *addr = ramblock_ptr(block, start);
-
/* The requested range should fit in within the block range */
g_assert((start + length) <= block->used_length);
#ifdef CONFIG_LIBPMEM
/* The lack of support for pmem should not block the sync */
if (ramblock_is_pmem(block)) {
+ void *addr = ramblock_ptr(block, start);
pmem_persist(addr, length);
return;
}
* specified as persistent (or is not one) - use the msync.
* Less optimal but still achieves the same goal
*/
+ void *addr = ramblock_ptr(block, start);
if (qemu_msync(addr, length, block->fd)) {
warn_report("%s: failed to sync memory range: start: "
RAM_ADDR_FMT " length: " RAM_ADDR_FMT,
if (new_block->host) {
qemu_ram_setup_dump(new_block->host, new_block->max_length);
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
- /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
- qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
+ /*
+ * MADV_DONTFORK is also needed by KVM in absence of synchronous MMU
+ * Configure it unless the machine is a qtest server, in which case
+ * KVM is not used and it may be forked (eg for fuzzing purposes).
+ */
+ if (!qtest_enabled()) {
+ qemu_madvise(new_block->host, new_block->max_length,
+ QEMU_MADV_DONTFORK);
+ }
ram_block_notify_add(new_block->host, new_block->max_length);
}
}
{
RAMBlock *new_block;
Error *local_err = NULL;
- int64_t file_size;
+ int64_t file_size, file_align;
/* Just support these ram flags by now. */
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
size = HOST_PAGE_ALIGN(size);
file_size = get_file_size(fd);
if (file_size > 0 && file_size < size) {
- error_setg(errp, "backing store %s size 0x%" PRIx64
+ error_setg(errp, "backing store size 0x%" PRIx64
" does not match 'size' option 0x" RAM_ADDR_FMT,
- mem_path, file_size, size);
+ file_size, size);
+ return NULL;
+ }
+
+ file_align = get_file_align(fd);
+ if (file_align > 0 && mr && file_align > mr->align) {
+ error_setg(errp, "backing store align 0x%" PRIx64
+ " is larger than 'align' option 0x%" PRIx64,
+ file_align, mr->align);
return NULL;
}
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
- void *ptr, target_ulong len, int is_write)
+ void *ptr, target_ulong len, bool is_write)
{
int flags;
target_ulong l, page;
}
void cpu_physical_memory_rw(hwaddr addr, void *buf,
- hwaddr len, int is_write)
+ hwaddr len, bool is_write)
{
address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
buf, len, is_write);
if (!memory_access_is_direct(mr, is_write)) {
if (atomic_xchg(&bounce.in_use, true)) {
+ *plen = 0;
return NULL;
}
/* Avoid unbounded allocations */
void *cpu_physical_memory_map(hwaddr addr,
hwaddr *plen,
- int is_write)
+ bool is_write)
{
return address_space_map(&address_space_memory, addr, plen, is_write,
MEMTXATTRS_UNSPECIFIED);
}
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
- int is_write, hwaddr access_len)
+ bool is_write, hwaddr access_len)
{
return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}
#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__)
#define RCU_READ_LOCK(...) rcu_read_lock()
#define RCU_READ_UNLOCK(...) rcu_read_unlock()
-#include "memory_ldst.inc.c"
+#include "memory_ldst.c.inc"
int64_t address_space_cache_init(MemoryRegionCache *cache,
AddressSpace *as,
/* Called from RCU critical section. address_space_read_cached uses this
* out of line function when the target is an MMIO or IOMMU region.
*/
-void
+MemTxResult
address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr,
void *buf, hwaddr len)
{
l = len;
mr = address_space_translate_cached(cache, addr, &addr1, &l, false,
MEMTXATTRS_UNSPECIFIED);
- flatview_read_continue(cache->fv,
- addr, MEMTXATTRS_UNSPECIFIED, buf, len,
- addr1, l, mr);
+ return flatview_read_continue(cache->fv,
+ addr, MEMTXATTRS_UNSPECIFIED, buf, len,
+ addr1, l, mr);
}
/* Called from RCU critical section. address_space_write_cached uses this
* out of line function when the target is an MMIO or IOMMU region.
*/
-void
+MemTxResult
address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
const void *buf, hwaddr len)
{
l = len;
mr = address_space_translate_cached(cache, addr, &addr1, &l, true,
MEMTXATTRS_UNSPECIFIED);
- flatview_write_continue(cache->fv,
- addr, MEMTXATTRS_UNSPECIFIED, buf, len,
- addr1, l, mr);
+ return flatview_write_continue(cache->fv,
+ addr, MEMTXATTRS_UNSPECIFIED, buf, len,
+ addr1, l, mr);
}
#define ARG1_DECL MemoryRegionCache *cache
#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
#define RCU_READ_LOCK() ((void)0)
#define RCU_READ_UNLOCK() ((void)0)
-#include "memory_ldst.inc.c"
+#include "memory_ldst.c.inc"
/* virtual memory access for debug (includes writing to ROM) */
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
- void *ptr, target_ulong len, int is_write)
+ void *ptr, target_ulong len, bool is_write)
{
hwaddr phys_addr;
target_ulong l, page;
while (len > 0) {
int asidx;
MemTxAttrs attrs;
+ MemTxResult res;
page = addr & TARGET_PAGE_MASK;
phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
l = len;
phys_addr += (addr & ~TARGET_PAGE_MASK);
if (is_write) {
- address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
- attrs, buf, l);
+ res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
+ attrs, buf, l);
} else {
- address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, attrs, buf,
- l, false);
+ res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
+ attrs, buf, l);
+ }
+ if (res != MEMTX_OK) {
+ return -1;
}
len -= l;
buf += l;
}
}
+/*
+ * If positive, discarding RAM is disabled. If negative, discarding RAM is
+ * required to work and cannot be disabled.
+ */
+static int ram_block_discard_disabled;
+
+int ram_block_discard_disable(bool state)
+{
+ int old;
+
+ if (!state) {
+ atomic_dec(&ram_block_discard_disabled);
+ return 0;
+ }
+
+ do {
+ old = atomic_read(&ram_block_discard_disabled);
+ if (old < 0) {
+ return -EBUSY;
+ }
+ } while (atomic_cmpxchg(&ram_block_discard_disabled, old, old + 1) != old);
+ return 0;
+}
+
+int ram_block_discard_require(bool state)
+{
+ int old;
+
+ if (!state) {
+ atomic_inc(&ram_block_discard_disabled);
+ return 0;
+ }
+
+ do {
+ old = atomic_read(&ram_block_discard_disabled);
+ if (old > 0) {
+ return -EBUSY;
+ }
+ } while (atomic_cmpxchg(&ram_block_discard_disabled, old, old - 1) != old);
+ return 0;
+}
+
+bool ram_block_discard_is_disabled(void)
+{
+ return atomic_read(&ram_block_discard_disabled) > 0;
+}
+
+bool ram_block_discard_is_required(void)
+{
+ return atomic_read(&ram_block_discard_disabled) < 0;
+}
+
#endif