* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
+#include "qapi/error.h"
#ifndef _WIN32
-#include <sys/mman.h>
#endif
-#include "qemu-common.h"
+#include "qemu/cutils.h"
#include "cpu.h"
+#include "exec/exec-all.h"
#include "tcg.h"
-#include "hw/hw.h"
+#include "hw/qdev-core.h"
#if !defined(CONFIG_USER_ONLY)
#include "hw/boards.h"
+#include "hw/xen/xen.h"
#endif
-#include "hw/qdev.h"
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
-#include "hw/xen/xen.h"
#include "qemu/timer.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#if defined(CONFIG_USER_ONLY)
+#include "qemu.h"
+#else /* !CONFIG_USER_ONLY */
+#include "hw/hw.h"
#include "exec/memory.h"
+#include "exec/ioport.h"
#include "sysemu/dma.h"
+#include "sysemu/numa.h"
+#include "sysemu/hw_accel.h"
#include "exec/address-spaces.h"
-#if defined(CONFIG_USER_ONLY)
-#include <qemu.h>
-#else /* !CONFIG_USER_ONLY */
#include "sysemu/xen-mapcache.h"
-#include "trace.h"
+#include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
#include "exec/ram_addr.h"
#include "exec/log.h"
+#include "migration/vmstate.h"
+
#include "qemu/range.h"
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
+#ifdef TARGET_PAGE_BITS_VARY
+int target_page_bits;
+bool target_page_bits_decided;
+#endif
+
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
/* current CPU in the current thread. It is only valid inside
cpu_exec() */
2 = Adaptive rate instruction counting. */
int use_icount;
+bool set_preferred_target_page_bits(int bits)
+{
+ /* The target page size is the lowest common denominator for all
+ * the CPUs in the system, so we can only make it smaller, never
+ * larger. And we can't make it smaller once we've committed to
+ * a particular size.
+ */
+#ifdef TARGET_PAGE_BITS_VARY
+ assert(bits >= TARGET_PAGE_BITS_MIN);
+ if (target_page_bits == 0 || target_page_bits > bits) {
+ if (target_page_bits_decided) {
+ return false;
+ }
+ target_page_bits = bits;
+ }
+#endif
+ return true;
+}
+
#if !defined(CONFIG_USER_ONLY)
+static void finalize_target_page_bits(void)
+{
+#ifdef TARGET_PAGE_BITS_VARY
+ if (target_page_bits == 0) {
+ target_page_bits = TARGET_PAGE_BITS_MIN;
+ }
+ target_page_bits_decided = true;
+#endif
+}
+
typedef struct PhysPageEntry PhysPageEntry;
struct PhysPageEntry {
struct AddressSpaceDispatch {
struct rcu_head rcu;
+ MemoryRegionSection *mru_section;
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
MemoryRegion iomem;
AddressSpace *as;
hwaddr base;
- uint16_t sub_section[TARGET_PAGE_SIZE];
+ uint16_t sub_section[];
} subpage_t;
#define PHYS_SECTION_UNASSIGNED 0
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{
+ static unsigned alloc_hint = 16;
if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
- map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
+ alloc_hint = map->nodes_nb_alloc;
}
}
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
* and update our entry so we can skip it and go directly to the destination.
*/
-static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
+static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
{
unsigned valid_ptr = P_L2_SIZE;
int valid = 0;
valid_ptr = i;
valid++;
if (p[i].skip) {
- phys_page_compact(&p[i], nodes, compacted);
+ phys_page_compact(&p[i], nodes);
}
}
static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
- DECLARE_BITMAP(compacted, nodes_nb);
-
if (d->phys_map.skip) {
- phys_page_compact(&d->phys_map, d->map.nodes, compacted);
+ phys_page_compact(&d->phys_map, d->map.nodes);
}
}
+static inline bool section_covers_addr(const MemoryRegionSection *section,
+ hwaddr addr)
+{
+ /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
+ * the section must cover the entire address space.
+ */
+ return int128_gethi(section->size) ||
+ range_covers_byte(section->offset_within_address_space,
+ int128_getlo(section->size), addr);
+}
+
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
Node *nodes, MemoryRegionSection *sections)
{
lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
}
- if (sections[lp.ptr].size.hi ||
- range_covers_byte(sections[lp.ptr].offset_within_address_space,
- sections[lp.ptr].size.lo, addr)) {
+ if (section_covers_addr(§ions[lp.ptr], addr)) {
return §ions[lp.ptr];
} else {
return §ions[PHYS_SECTION_UNASSIGNED];
hwaddr addr,
bool resolve_subpage)
{
- MemoryRegionSection *section;
+ MemoryRegionSection *section = atomic_read(&d->mru_section);
subpage_t *subpage;
+ bool update;
- section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
+ if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
+ section_covers_addr(section, addr)) {
+ update = false;
+ } else {
+ section = phys_page_find(d->phys_map, addr, d->map.nodes,
+ d->map.sections);
+ update = true;
+ }
if (resolve_subpage && section->mr->subpage) {
subpage = container_of(section->mr, subpage_t, iomem);
section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
}
+ if (update) {
+ atomic_set(&d->mru_section, section);
+ }
return section;
}
return section;
}
+/* Called from RCU critical section */
+IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
+ bool is_write)
+{
+ IOMMUTLBEntry iotlb = {0};
+ MemoryRegionSection *section;
+ MemoryRegion *mr;
+
+ for (;;) {
+ AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
+ section = address_space_lookup_region(d, addr, false);
+ addr = addr - section->offset_within_address_space
+ + section->offset_within_region;
+ mr = section->mr;
+
+ if (!mr->iommu_ops) {
+ break;
+ }
+
+ iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+ if (!(iotlb.perm & (1 << is_write))) {
+ iotlb.target_as = NULL;
+ break;
+ }
+
+ addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+ | (addr & iotlb.addr_mask));
+ as = iotlb.target_as;
+ }
+
+ return iotlb;
+}
+
/* Called from RCU critical section */
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
hwaddr *xlat, hwaddr *plen,
hwaddr *xlat, hwaddr *plen)
{
MemoryRegionSection *section;
- AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
+ AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
section = address_space_translate_internal(d, addr, xlat, plen, false);
/* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
version_id is increased. */
cpu->interrupt_request &= ~0x01;
- tlb_flush(cpu, 1);
+ tlb_flush(cpu);
return 0;
}
}
#endif
-#ifndef CONFIG_USER_ONLY
-static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
-
-static int cpu_get_free_index(Error **errp)
+void cpu_exec_unrealizefn(CPUState *cpu)
{
- int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
-
- if (cpu >= MAX_CPUMASK_BITS) {
- error_setg(errp, "Trying to use more CPUs than max of %d",
- MAX_CPUMASK_BITS);
- return -1;
- }
+ CPUClass *cc = CPU_GET_CLASS(cpu);
- bitmap_set(cpu_index_map, cpu, 1);
- return cpu;
-}
+ cpu_list_remove(cpu);
-void cpu_exec_exit(CPUState *cpu)
-{
- if (cpu->cpu_index == -1) {
- /* cpu_index was never allocated by this @cpu or was already freed. */
- return;
+ if (cc->vmsd != NULL) {
+ vmstate_unregister(NULL, cc->vmsd, cpu);
}
-
- bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
- cpu->cpu_index = -1;
-}
-#else
-
-static int cpu_get_free_index(Error **errp)
-{
- CPUState *some_cpu;
- int cpu_index = 0;
-
- CPU_FOREACH(some_cpu) {
- cpu_index++;
+ if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
+ vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
}
- return cpu_index;
-}
-
-void cpu_exec_exit(CPUState *cpu)
-{
}
-#endif
-void cpu_exec_init(CPUState *cpu, Error **errp)
+void cpu_exec_initfn(CPUState *cpu)
{
- CPUClass *cc = CPU_GET_CLASS(cpu);
- int cpu_index;
- Error *local_err = NULL;
-
cpu->as = NULL;
cpu->num_ases = 0;
cpu->memory = system_memory;
object_ref(OBJECT(cpu->memory));
#endif
+}
-#if defined(CONFIG_USER_ONLY)
- cpu_list_lock();
-#endif
- cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
- if (local_err) {
- error_propagate(errp, local_err);
-#if defined(CONFIG_USER_ONLY)
- cpu_list_unlock();
-#endif
- return;
- }
- QTAILQ_INSERT_TAIL(&cpus, cpu, node);
-#if defined(CONFIG_USER_ONLY)
- cpu_list_unlock();
-#endif
+void cpu_exec_realizefn(CPUState *cpu, Error **errp)
+{
+ CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
+
+ cpu_list_add(cpu);
+
+#ifndef CONFIG_USER_ONLY
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
- vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
+ vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
}
if (cc->vmsd != NULL) {
- vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
+ vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
}
+#endif
}
-#if defined(CONFIG_USER_ONLY)
-static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
-{
- tb_invalidate_phys_page_range(pc, pc + 1, 0);
-}
-#else
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
{
- MemTxAttrs attrs;
- hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
- if (phys != -1) {
- tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
- phys | (pc & ~TARGET_PAGE_MASK));
- }
+ /* Flush the whole TB as this will not have race conditions
+ * even if we don't have proper locking yet.
+ * Ideally we would just invalidate the TBs for the
+ * specified PC.
+ */
+ tb_flush(cpu);
}
-#endif
#if defined(CONFIG_USER_ONLY)
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
fprintf(stderr, "\n");
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
if (qemu_log_separate()) {
+ qemu_log_lock();
qemu_log("qemu: fatal: ");
qemu_log_vprintf(fmt, ap2);
qemu_log("\n");
log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
qemu_log_flush();
+ qemu_log_unlock();
qemu_log_close();
}
va_end(ap2);
if (memory_region_is_ram(section->mr)) {
/* Normal RAM. */
- iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
- + xlat;
+ iotlb = memory_region_get_ram_addr(section->mr) + xlat;
if (!section->readonly) {
iotlb |= PHYS_SECTION_NOTDIRTY;
} else {
}
#ifdef __linux__
+/*
+ * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
+ * may or may not name the same files / on the same filesystem now as
+ * when we actually open and map them. Iterate over the file
+ * descriptors instead, and use qemu_fd_getpagesize().
+ */
+static int find_max_supported_pagesize(Object *obj, void *opaque)
+{
+ char *mem_path;
+ long *hpsize_min = opaque;
-#include <sys/vfs.h>
+ if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+ mem_path = object_property_get_str(obj, "mem-path", NULL);
+ if (mem_path) {
+ long hpsize = qemu_mempath_getpagesize(mem_path);
+ if (hpsize < *hpsize_min) {
+ *hpsize_min = hpsize;
+ }
+ } else {
+ *hpsize_min = getpagesize();
+ }
+ }
-#define HUGETLBFS_MAGIC 0x958458f6
+ return 0;
+}
-static long gethugepagesize(const char *path, Error **errp)
+long qemu_getrampagesize(void)
{
- struct statfs fs;
- int ret;
+ long hpsize = LONG_MAX;
+ long mainrampagesize;
+ Object *memdev_root;
- do {
- ret = statfs(path, &fs);
- } while (ret != 0 && errno == EINTR);
+ if (mem_path) {
+ mainrampagesize = qemu_mempath_getpagesize(mem_path);
+ } else {
+ mainrampagesize = getpagesize();
+ }
+
+ /* it's possible we have memory-backend objects with
+ * hugepage-backed RAM. these may get mapped into system
+ * address space via -numa parameters or memory hotplug
+ * hooks. we want to take these into account, but we
+ * also want to make sure these supported hugepage
+ * sizes are applicable across the entire range of memory
+ * we may boot from, so we take the min across all
+ * backends, and assume normal pages in cases where a
+ * backend isn't backed by hugepages.
+ */
+ memdev_root = object_resolve_path("/objects", NULL);
+ if (memdev_root) {
+ object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
+ }
+ if (hpsize == LONG_MAX) {
+ /* No additional memory regions found ==> Report main RAM page size */
+ return mainrampagesize;
+ }
- if (ret != 0) {
- error_setg_errno(errp, errno, "failed to get page size of file %s",
- path);
- return 0;
+ /* If NUMA is disabled or the NUMA nodes are not backed with a
+ * memory-backend, then there is at least one node using "normal" RAM,
+ * so if its page size is smaller we have got to report that size instead.
+ */
+ if (hpsize > mainrampagesize &&
+ (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
+ static bool warned;
+ if (!warned) {
+ error_report("Huge page support disabled (n/a for main memory).");
+ warned = true;
+ }
+ return mainrampagesize;
}
- return fs.f_bsize;
+ return hpsize;
+}
+#else
+long qemu_getrampagesize(void)
+{
+ return getpagesize();
+}
+#endif
+
+#ifdef __linux__
+static int64_t get_file_size(int fd)
+{
+ int64_t size = lseek(fd, 0, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ return size;
}
static void *file_ram_alloc(RAMBlock *block,
const char *path,
Error **errp)
{
- struct stat st;
+ bool unlink_on_error = false;
char *filename;
char *sanitized_name;
char *c;
- void *area;
- int fd;
- uint64_t hpagesize;
- Error *local_err = NULL;
-
- hpagesize = gethugepagesize(path, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- goto error;
- }
- block->mr->align = hpagesize;
-
- if (memory < hpagesize) {
- error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
- "or larger than huge page size 0x%" PRIx64,
- memory, hpagesize);
- goto error;
- }
+ void *area = MAP_FAILED;
+ int fd = -1;
+ int64_t file_size;
if (kvm_enabled() && !kvm_has_sync_mmu()) {
error_setg(errp,
"host lacks kvm mmu notifiers, -mem-path unsupported");
- goto error;
+ return NULL;
}
- if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
- /* Make name safe to use with mkstemp by replacing '/' with '_'. */
- sanitized_name = g_strdup(memory_region_name(block->mr));
- for (c = sanitized_name; *c != '\0'; c++) {
- if (*c == '/') {
- *c = '_';
- }
+ for (;;) {
+ fd = open(path, O_RDWR);
+ if (fd >= 0) {
+ /* @path names an existing file, use it */
+ break;
}
+ if (errno == ENOENT) {
+ /* @path names a file that doesn't exist, create it */
+ fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
+ if (fd >= 0) {
+ unlink_on_error = true;
+ break;
+ }
+ } else if (errno == EISDIR) {
+ /* @path names a directory, create a file there */
+ /* Make name safe to use with mkstemp by replacing '/' with '_'. */
+ sanitized_name = g_strdup(memory_region_name(block->mr));
+ for (c = sanitized_name; *c != '\0'; c++) {
+ if (*c == '/') {
+ *c = '_';
+ }
+ }
- filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
- sanitized_name);
- g_free(sanitized_name);
+ filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
+ sanitized_name);
+ g_free(sanitized_name);
- fd = mkstemp(filename);
- if (fd >= 0) {
- unlink(filename);
+ fd = mkstemp(filename);
+ if (fd >= 0) {
+ unlink(filename);
+ g_free(filename);
+ break;
+ }
+ g_free(filename);
}
- g_free(filename);
- } else {
- fd = open(path, O_RDWR | O_CREAT, 0644);
+ if (errno != EEXIST && errno != EINTR) {
+ error_setg_errno(errp, errno,
+ "can't open backing store %s for guest RAM",
+ path);
+ goto error;
+ }
+ /*
+ * Try again on EINTR and EEXIST. The latter happens when
+ * something else creates the file between our two open().
+ */
}
- if (fd < 0) {
- error_setg_errno(errp, errno,
- "unable to create backing store for hugepages");
+ block->page_size = qemu_fd_getpagesize(fd);
+ block->mr->align = block->page_size;
+#if defined(__s390x__)
+ if (kvm_enabled()) {
+ block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
+ }
+#endif
+
+ file_size = get_file_size(fd);
+
+ if (memory < block->page_size) {
+ error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
+ "or larger than page size 0x%zx",
+ memory, block->page_size);
+ goto error;
+ }
+
+ if (file_size > 0 && file_size < memory) {
+ error_setg(errp, "backing store %s size 0x%" PRIx64
+ " does not match 'size' option 0x" RAM_ADDR_FMT,
+ path, file_size, memory);
goto error;
}
- memory = ROUND_UP(memory, hpagesize);
+ memory = ROUND_UP(memory, block->page_size);
/*
* ftruncate is not supported by hugetlbfs in older
* hosts, so don't bother bailing out on errors.
* If anything goes wrong with it under other filesystems,
* mmap will fail.
+ *
+ * Do not truncate the non-empty backend file to avoid corrupting
+ * the existing data in the file. Disabling shrinking is not
+ * enough. For example, the current vNVDIMM implementation stores
+ * the guest NVDIMM labels at the end of the backend file. If the
+ * backend file is later extended, QEMU will not be able to find
+ * those labels. Therefore, extending the non-empty backend file
+ * is disabled as well.
*/
- if (ftruncate(fd, memory)) {
+ if (!file_size && ftruncate(fd, memory)) {
perror("ftruncate");
}
- area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
+ area = qemu_ram_mmap(fd, memory, block->mr->align,
+ block->flags & RAM_SHARED);
if (area == MAP_FAILED) {
error_setg_errno(errp, errno,
- "unable to map backing store for hugepages");
- close(fd);
+ "unable to map backing store for guest RAM");
goto error;
}
if (mem_prealloc) {
- os_mem_prealloc(fd, area, memory);
+ os_mem_prealloc(fd, area, memory, smp_cpus, errp);
+ if (errp && *errp) {
+ goto error;
+ }
}
block->fd = fd;
return area;
error:
+ if (area != MAP_FAILED) {
+ qemu_ram_munmap(area, memory);
+ }
+ if (unlink_on_error) {
+ unlink(path);
+ }
+ if (fd != -1) {
+ close(fd);
+ }
return NULL;
}
#endif
}
}
-/* Called within an RCU critical section, or while the ramlist lock
- * is held.
- */
-static RAMBlock *find_ram_block(ram_addr_t addr)
+const char *qemu_ram_get_idstr(RAMBlock *rb)
{
- RAMBlock *block;
-
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- if (block->offset == addr) {
- return block;
- }
- }
-
- return NULL;
+ return rb->idstr;
}
-const char *qemu_ram_get_idstr(RAMBlock *rb)
+bool qemu_ram_is_shared(RAMBlock *rb)
{
- return rb->idstr;
+ return rb->flags & RAM_SHARED;
}
/* Called with iothread lock held. */
-void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
+void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
{
- RAMBlock *new_block, *block;
+ RAMBlock *block;
- rcu_read_lock();
- new_block = find_ram_block(addr);
assert(new_block);
assert(!new_block->idstr[0]);
}
pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+ rcu_read_lock();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
+ if (block != new_block &&
+ !strcmp(block->idstr, new_block->idstr)) {
fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
new_block->idstr);
abort();
}
/* Called with iothread lock held. */
-void qemu_ram_unset_idstr(ram_addr_t addr)
+void qemu_ram_unset_idstr(RAMBlock *block)
{
- RAMBlock *block;
-
/* FIXME: arch_init.c assumes that this is not called throughout
* migration. Ignore the problem since hot-unplug during migration
* does not work anyway.
*/
-
- rcu_read_lock();
- block = find_ram_block(addr);
if (block) {
memset(block->idstr, 0, sizeof(block->idstr));
}
- rcu_read_unlock();
+}
+
+size_t qemu_ram_pagesize(RAMBlock *rb)
+{
+ return rb->page_size;
+}
+
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+ RAMBlock *block;
+ size_t largest = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ largest = MAX(largest, qemu_ram_pagesize(block));
+ }
+
+ return largest;
}
static int memory_try_enable_merging(void *addr, size_t len)
* resize callback to update device state and/or add assertions to detect
* misuse, if necessary.
*/
-int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
+int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
{
- RAMBlock *block = find_ram_block(base);
-
assert(block);
newsize = HOST_PAGE_ALIGN(newsize);
if (err) {
error_propagate(errp, err);
qemu_mutex_unlock_ramlist();
+ return;
}
} else {
new_block->host = phys_mem_alloc(new_block->max_length,
"cannot set up guest memory '%s'",
memory_region_name(new_block->mr));
qemu_mutex_unlock_ramlist();
+ return;
}
memory_try_enable_merging(new_block->host, new_block->max_length);
}
if (new_block->host) {
qemu_ram_setup_dump(new_block->host, new_block->max_length);
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
+ /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
- if (kvm_enabled()) {
- kvm_setup_guest_memory(new_block->host, new_block->max_length);
- }
+ ram_block_notify_add(new_block->host, new_block->max_length);
}
}
new_block->max_length = max_size;
assert(max_size >= size);
new_block->fd = -1;
+ new_block->page_size = getpagesize();
new_block->host = host;
if (host) {
new_block->flags |= RAM_PREALLOC;
g_free(block);
}
-void qemu_ram_free(ram_addr_t addr)
+void qemu_ram_free(RAMBlock *block)
{
- RAMBlock *block;
+ if (!block) {
+ return;
+ }
- qemu_mutex_lock_ramlist();
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
- if (addr == block->offset) {
- QLIST_REMOVE_RCU(block, next);
- ram_list.mru_block = NULL;
- /* Write list before version */
- smp_wmb();
- ram_list.version++;
- call_rcu(block, reclaim_ramblock, rcu);
- break;
- }
+ if (block->host) {
+ ram_block_notify_remove(block->host, block->max_length);
}
+
+ qemu_mutex_lock_ramlist();
+ QLIST_REMOVE_RCU(block, next);
+ ram_list.mru_block = NULL;
+ /* Write list before version */
+ smp_wmb();
+ ram_list.version++;
+ call_rcu(block, reclaim_ramblock, rcu);
qemu_mutex_unlock_ramlist();
}
}
#endif /* !_WIN32 */
-int qemu_get_ram_fd(ram_addr_t addr)
-{
- RAMBlock *block;
- int fd;
-
- rcu_read_lock();
- block = qemu_get_ram_block(addr);
- fd = block->fd;
- rcu_read_unlock();
- return fd;
-}
-
-void qemu_set_ram_fd(ram_addr_t addr, int fd)
-{
- RAMBlock *block;
-
- rcu_read_lock();
- block = qemu_get_ram_block(addr);
- block->fd = fd;
- rcu_read_unlock();
-}
-
-void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
-{
- RAMBlock *block;
- void *ptr;
-
- rcu_read_lock();
- block = qemu_get_ram_block(addr);
- ptr = ramblock_ptr(block, 0);
- rcu_read_unlock();
- return ptr;
-}
-
/* Return a host pointer to ram allocated with qemu_ram_alloc.
* This should not be used for general purpose DMA. Use address_space_map
* or address_space_rw instead. For local memory (e.g. video ram) that the
*
* Called within RCU critical section.
*/
-void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
+void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
{
RAMBlock *block = ram_block;
if (block == NULL) {
block = qemu_get_ram_block(addr);
+ addr -= block->offset;
}
if (xen_enabled() && block->host == NULL) {
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
- return ramblock_ptr(block, addr - block->offset);
+ return ramblock_ptr(block, addr);
}
-/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
+/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
* but takes a size argument.
*
* Called within RCU critical section.
hwaddr *size)
{
RAMBlock *block = ram_block;
- ram_addr_t offset_inside_block;
if (*size == 0) {
return NULL;
}
if (block == NULL) {
block = qemu_get_ram_block(addr);
+ addr -= block->offset;
}
- offset_inside_block = addr - block->offset;
- *size = MIN(*size, block->max_length - offset_inside_block);
+ *size = MIN(*size, block->max_length - addr);
if (xen_enabled() && block->host == NULL) {
/* We need to check if the requested address is in the RAM
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
- return ramblock_ptr(block, offset_inside_block);
+ return ramblock_ptr(block, addr);
}
/*
* ram_addr_t.
*/
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
- ram_addr_t *ram_addr,
ram_addr_t *offset)
{
RAMBlock *block;
uint8_t *host = ptr;
if (xen_enabled()) {
+ ram_addr_t ram_addr;
rcu_read_lock();
- *ram_addr = xen_ram_addr_from_mapcache(ptr);
- block = qemu_get_ram_block(*ram_addr);
+ ram_addr = xen_ram_addr_from_mapcache(ptr);
+ block = qemu_get_ram_block(ram_addr);
if (block) {
- *offset = (host - block->host);
+ *offset = ram_addr - block->offset;
}
rcu_read_unlock();
return block;
if (round_offset) {
*offset &= TARGET_PAGE_MASK;
}
- *ram_addr = block->offset + *offset;
rcu_read_unlock();
return block;
}
/* Some of the softmmu routines need to translate from a host pointer
(typically a TLB entry) back to a ram offset. */
-MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
+ram_addr_t qemu_ram_addr_from_host(void *ptr)
{
RAMBlock *block;
- ram_addr_t offset; /* Not used */
-
- block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
+ ram_addr_t offset;
+ block = qemu_ram_block_from_host(ptr, false, &offset);
if (!block) {
- return NULL;
+ return RAM_ADDR_INVALID;
}
- return block->mr;
+ return block->offset + offset;
}
/* Called within RCU critical section. */
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
uint64_t val, unsigned size)
{
+ bool locked = false;
+
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+ locked = true;
+ tb_lock();
tb_invalidate_phys_page_fast(ram_addr, size);
}
switch (size) {
case 1:
- stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
+ stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
case 2:
- stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
+ stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
case 4:
- stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
+ stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
default:
abort();
}
+
+ if (locked) {
+ tb_unlock();
+ }
+
/* Set both VGA and migration bits for simplicity and to remove
* the notdirty callback faster.
*/
target_ulong pc, cs_base;
target_ulong vaddr;
CPUWatchpoint *wp;
- int cpu_flags;
+ uint32_t cpu_flags;
if (cpu->watchpoint_hit) {
/* We re-entered the check after replacing the TB. Now raise
return;
}
vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
+ vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len);
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
if (cpu_watchpoint_address_matches(wp, vaddr, len)
&& (wp->flags & flags)) {
continue;
}
cpu->watchpoint_hit = wp;
+
+ /* Both tb_lock and iothread_mutex will be reset when
+ * cpu_loop_exit or cpu_loop_exit_noexc longjmp
+ * back into the cpu_exec main loop.
+ */
+ tb_lock();
tb_check_watchpoint(cpu);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
} else {
cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
- cpu_resume_from_signal(cpu, NULL);
+ cpu_loop_exit_noexc(cpu);
}
}
} else {
{
subpage_t *mmio;
- mmio = g_malloc0(sizeof(subpage_t));
-
+ mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
mmio->as = as;
mmio->base = base;
memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
NULL, UINT64_MAX);
+
+ /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
+ * which can be called without the iothread mutex.
+ */
memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
NULL, UINT64_MAX);
+ memory_region_clear_global_locking(&io_mem_notdirty);
+
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
NULL, UINT64_MAX);
}
* may have split the RCU critical section.
*/
d = atomic_rcu_read(&cpuas->as->dispatch);
- cpuas->memory_dispatch = d;
- tlb_flush(cpuas->cpu, 1);
+ atomic_rcu_set(&cpuas->memory_dispatch, d);
+ tlb_flush(cpuas->cpu);
}
void address_space_init_dispatch(AddressSpace *as)
hwaddr length)
{
uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+ addr += memory_region_get_ram_addr(mr);
+
/* No early return if dirty_log_mask is or becomes 0, because
* cpu_physical_memory_set_dirty_range will still call
* xen_modified_memory.
cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
}
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
+ tb_lock();
tb_invalidate_phys_range(addr, addr + length);
+ tb_unlock();
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
}
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
break;
case 4:
/* 32 bit write access */
- val = ldl_p(buf);
+ val = (uint32_t)ldl_p(buf);
result |= memory_region_dispatch_write(mr, addr1, val, 4,
attrs);
break;
abort();
}
} else {
- addr1 += memory_region_get_ram_addr(mr);
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(ptr, buf, l);
invalidate_and_set_dirty(mr, addr1, l);
}
}
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block, mr->ram_addr + addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, l);
}
memory_region_is_romd(mr))) {
l = memory_access_size(mr, l, addr1);
} else {
- addr1 += memory_region_get_ram_addr(mr);
/* ROM/RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
+ ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (type) {
case WRITE_DATA:
memcpy(ptr, buf, l);
void cpu_exec_init_all(void)
{
qemu_mutex_init(&ram_list.mutex);
+ /* The data structures we set up here depend on knowing the page size,
+ * so no more changes can be made after this point.
+ * In an ideal world, nothing we did before we had finished the
+ * machine setup would care about the target page size, and we could
+ * do this much later, rather than requiring board models to state
+ * up front what their requirements are.
+ */
+ finalize_target_page_bits();
io_mem_init();
memory_map_init();
qemu_mutex_init(&map_client_list_lock);
if (!memory_access_is_direct(mr, is_write)) {
l = memory_access_size(mr, l, addr);
if (!memory_region_access_valid(mr, xlat, l, is_write)) {
+ rcu_read_unlock();
return false;
}
}
return true;
}
+static hwaddr
+address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_len,
+ MemoryRegion *mr, hwaddr base, hwaddr len,
+ bool is_write)
+{
+ hwaddr done = 0;
+ hwaddr xlat;
+ MemoryRegion *this_mr;
+
+ for (;;) {
+ target_len -= len;
+ addr += len;
+ done += len;
+ if (target_len == 0) {
+ return done;
+ }
+
+ len = target_len;
+ this_mr = address_space_translate(as, addr, &xlat, &len, is_write);
+ if (this_mr != mr || xlat != base + done) {
+ return done;
+ }
+ }
+}
+
/* Map a physical memory region into a host virtual address.
* May map a subset of the requested range, given by and returned in *plen.
* May return NULL if resources needed to perform the mapping are exhausted.
bool is_write)
{
hwaddr len = *plen;
- hwaddr done = 0;
- hwaddr l, xlat, base;
- MemoryRegion *mr, *this_mr;
- ram_addr_t raddr;
+ hwaddr l, xlat;
+ MemoryRegion *mr;
void *ptr;
if (len == 0) {
return bounce.buffer;
}
- base = xlat;
- raddr = memory_region_get_ram_addr(mr);
-
- for (;;) {
- len -= l;
- addr += l;
- done += l;
- if (len == 0) {
- break;
- }
-
- l = len;
- this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
- if (this_mr != mr || xlat != base + done) {
- break;
- }
- }
memory_region_ref(mr);
- *plen = done;
- ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
+ *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
+ ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen);
rcu_read_unlock();
return ptr;
MemoryRegion *mr;
ram_addr_t addr1;
- mr = qemu_ram_addr_from_host(buffer, &addr1);
+ mr = memory_region_from_host(buffer, &addr1);
assert(mr != NULL);
if (is_write) {
invalidate_and_set_dirty(mr, addr1, access_len);
return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
}
-/* warning: addr must be aligned */
-static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs,
- MemTxResult *result,
- enum device_endian endian)
-{
- uint8_t *ptr;
- uint64_t val;
- MemoryRegion *mr;
- hwaddr l = 4;
- hwaddr addr1;
- MemTxResult r;
- bool release_lock = false;
-
- rcu_read_lock();
- mr = address_space_translate(as, addr, &addr1, &l, false);
- if (l < 4 || !memory_access_is_direct(mr, false)) {
- release_lock |= prepare_mmio_access(mr);
-
- /* I/O case */
- r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
-#if defined(TARGET_WORDS_BIGENDIAN)
- if (endian == DEVICE_LITTLE_ENDIAN) {
- val = bswap32(val);
- }
-#else
- if (endian == DEVICE_BIG_ENDIAN) {
- val = bswap32(val);
- }
-#endif
- } else {
- /* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- (memory_region_get_ram_addr(mr)
- & TARGET_PAGE_MASK)
- + addr1);
- switch (endian) {
- case DEVICE_LITTLE_ENDIAN:
- val = ldl_le_p(ptr);
- break;
- case DEVICE_BIG_ENDIAN:
- val = ldl_be_p(ptr);
- break;
- default:
- val = ldl_p(ptr);
- break;
- }
- r = MEMTX_OK;
- }
- if (result) {
- *result = r;
- }
- if (release_lock) {
- qemu_mutex_unlock_iothread();
- }
- rcu_read_unlock();
- return val;
-}
-
-uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_ldl_internal(as, addr, attrs, result,
- DEVICE_NATIVE_ENDIAN);
-}
-
-uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_ldl_internal(as, addr, attrs, result,
- DEVICE_LITTLE_ENDIAN);
-}
-
-uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_ldl_internal(as, addr, attrs, result,
- DEVICE_BIG_ENDIAN);
-}
-
-uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs,
- MemTxResult *result,
- enum device_endian endian)
-{
- uint8_t *ptr;
- uint64_t val;
- MemoryRegion *mr;
- hwaddr l = 8;
- hwaddr addr1;
- MemTxResult r;
- bool release_lock = false;
-
- rcu_read_lock();
- mr = address_space_translate(as, addr, &addr1, &l,
- false);
- if (l < 8 || !memory_access_is_direct(mr, false)) {
- release_lock |= prepare_mmio_access(mr);
-
- /* I/O case */
- r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
-#if defined(TARGET_WORDS_BIGENDIAN)
- if (endian == DEVICE_LITTLE_ENDIAN) {
- val = bswap64(val);
- }
-#else
- if (endian == DEVICE_BIG_ENDIAN) {
- val = bswap64(val);
- }
-#endif
- } else {
- /* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- (memory_region_get_ram_addr(mr)
- & TARGET_PAGE_MASK)
- + addr1);
- switch (endian) {
- case DEVICE_LITTLE_ENDIAN:
- val = ldq_le_p(ptr);
- break;
- case DEVICE_BIG_ENDIAN:
- val = ldq_be_p(ptr);
- break;
- default:
- val = ldq_p(ptr);
- break;
- }
- r = MEMTX_OK;
- }
- if (result) {
- *result = r;
- }
- if (release_lock) {
- qemu_mutex_unlock_iothread();
- }
- rcu_read_unlock();
- return val;
-}
-
-uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_ldq_internal(as, addr, attrs, result,
- DEVICE_NATIVE_ENDIAN);
-}
-
-uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_ldq_internal(as, addr, attrs, result,
- DEVICE_LITTLE_ENDIAN);
-}
-
-uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_ldq_internal(as, addr, attrs, result,
- DEVICE_BIG_ENDIAN);
-}
-
-uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* XXX: optimize */
-uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- uint8_t val;
- MemTxResult r;
-
- r = address_space_rw(as, addr, attrs, &val, 1, 0);
- if (result) {
- *result = r;
- }
- return val;
-}
-
-uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline uint32_t address_space_lduw_internal(AddressSpace *as,
- hwaddr addr,
- MemTxAttrs attrs,
- MemTxResult *result,
- enum device_endian endian)
-{
- uint8_t *ptr;
- uint64_t val;
- MemoryRegion *mr;
- hwaddr l = 2;
- hwaddr addr1;
- MemTxResult r;
- bool release_lock = false;
-
- rcu_read_lock();
- mr = address_space_translate(as, addr, &addr1, &l,
- false);
- if (l < 2 || !memory_access_is_direct(mr, false)) {
- release_lock |= prepare_mmio_access(mr);
-
- /* I/O case */
- r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
-#if defined(TARGET_WORDS_BIGENDIAN)
- if (endian == DEVICE_LITTLE_ENDIAN) {
- val = bswap16(val);
- }
-#else
- if (endian == DEVICE_BIG_ENDIAN) {
- val = bswap16(val);
- }
-#endif
- } else {
- /* RAM case */
- ptr = qemu_get_ram_ptr(mr->ram_block,
- (memory_region_get_ram_addr(mr)
- & TARGET_PAGE_MASK)
- + addr1);
- switch (endian) {
- case DEVICE_LITTLE_ENDIAN:
- val = lduw_le_p(ptr);
- break;
- case DEVICE_BIG_ENDIAN:
- val = lduw_be_p(ptr);
- break;
- default:
- val = lduw_p(ptr);
- break;
- }
- r = MEMTX_OK;
- }
- if (result) {
- *result = r;
- }
- if (release_lock) {
- qemu_mutex_unlock_iothread();
- }
- rcu_read_unlock();
- return val;
-}
+#define ARG1_DECL AddressSpace *as
+#define ARG1 as
+#define SUFFIX
+#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__)
+#define IS_DIRECT(mr, is_write) memory_access_is_direct(mr, is_write)
+#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs)
+#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
+#define RCU_READ_LOCK(...) rcu_read_lock()
+#define RCU_READ_UNLOCK(...) rcu_read_unlock()
+#include "memory_ldst.inc.c"
-uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
+int64_t address_space_cache_init(MemoryRegionCache *cache,
+ AddressSpace *as,
+ hwaddr addr,
+ hwaddr len,
+ bool is_write)
{
- return address_space_lduw_internal(as, addr, attrs, result,
- DEVICE_NATIVE_ENDIAN);
-}
-
-uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_lduw_internal(as, addr, attrs, result,
- DEVICE_LITTLE_ENDIAN);
-}
-
-uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
- MemTxAttrs attrs, MemTxResult *result)
-{
- return address_space_lduw_internal(as, addr, attrs, result,
- DEVICE_BIG_ENDIAN);
-}
-
-uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
-{
- return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned. The ram page is not masked as dirty
- and the code inside is not invalidated. It is useful if the dirty
- bits are used to track modified PTEs */
-void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- uint8_t *ptr;
- MemoryRegion *mr;
- hwaddr l = 4;
- hwaddr addr1;
- MemTxResult r;
- uint8_t dirty_log_mask;
- bool release_lock = false;
-
- rcu_read_lock();
- mr = address_space_translate(as, addr, &addr1, &l,
- true);
- if (l < 4 || !memory_access_is_direct(mr, true)) {
- release_lock |= prepare_mmio_access(mr);
-
- r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
- } else {
- addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
- stl_p(ptr, val);
-
- dirty_log_mask = memory_region_get_dirty_log_mask(mr);
- dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
- cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
- r = MEMTX_OK;
- }
- if (result) {
- *result = r;
- }
- if (release_lock) {
- qemu_mutex_unlock_iothread();
- }
- rcu_read_unlock();
-}
-
-void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline void address_space_stl_internal(AddressSpace *as,
- hwaddr addr, uint32_t val,
- MemTxAttrs attrs,
- MemTxResult *result,
- enum device_endian endian)
-{
- uint8_t *ptr;
- MemoryRegion *mr;
- hwaddr l = 4;
- hwaddr addr1;
- MemTxResult r;
- bool release_lock = false;
-
- rcu_read_lock();
- mr = address_space_translate(as, addr, &addr1, &l,
- true);
- if (l < 4 || !memory_access_is_direct(mr, true)) {
- release_lock |= prepare_mmio_access(mr);
-
-#if defined(TARGET_WORDS_BIGENDIAN)
- if (endian == DEVICE_LITTLE_ENDIAN) {
- val = bswap32(val);
- }
-#else
- if (endian == DEVICE_BIG_ENDIAN) {
- val = bswap32(val);
- }
-#endif
- r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
- } else {
- /* RAM case */
- addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
- switch (endian) {
- case DEVICE_LITTLE_ENDIAN:
- stl_le_p(ptr, val);
- break;
- case DEVICE_BIG_ENDIAN:
- stl_be_p(ptr, val);
- break;
- default:
- stl_p(ptr, val);
- break;
- }
- invalidate_and_set_dirty(mr, addr1, 4);
- r = MEMTX_OK;
- }
- if (result) {
- *result = r;
- }
- if (release_lock) {
- qemu_mutex_unlock_iothread();
- }
- rcu_read_unlock();
-}
-
-void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- address_space_stl_internal(as, addr, val, attrs, result,
- DEVICE_NATIVE_ENDIAN);
-}
-
-void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- address_space_stl_internal(as, addr, val, attrs, result,
- DEVICE_LITTLE_ENDIAN);
-}
-
-void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- address_space_stl_internal(as, addr, val, attrs, result,
- DEVICE_BIG_ENDIAN);
-}
-
-void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* XXX: optimize */
-void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- uint8_t v = val;
- MemTxResult r;
-
- r = address_space_rw(as, addr, attrs, &v, 1, 1);
- if (result) {
- *result = r;
- }
-}
-
-void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline void address_space_stw_internal(AddressSpace *as,
- hwaddr addr, uint32_t val,
- MemTxAttrs attrs,
- MemTxResult *result,
- enum device_endian endian)
-{
- uint8_t *ptr;
+ hwaddr l, xlat;
MemoryRegion *mr;
- hwaddr l = 2;
- hwaddr addr1;
- MemTxResult r;
- bool release_lock = false;
+ void *ptr;
- rcu_read_lock();
- mr = address_space_translate(as, addr, &addr1, &l, true);
- if (l < 2 || !memory_access_is_direct(mr, true)) {
- release_lock |= prepare_mmio_access(mr);
+ assert(len > 0);
-#if defined(TARGET_WORDS_BIGENDIAN)
- if (endian == DEVICE_LITTLE_ENDIAN) {
- val = bswap16(val);
- }
-#else
- if (endian == DEVICE_BIG_ENDIAN) {
- val = bswap16(val);
- }
-#endif
- r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
- } else {
- /* RAM case */
- addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
- ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
- switch (endian) {
- case DEVICE_LITTLE_ENDIAN:
- stw_le_p(ptr, val);
- break;
- case DEVICE_BIG_ENDIAN:
- stw_be_p(ptr, val);
- break;
- default:
- stw_p(ptr, val);
- break;
- }
- invalidate_and_set_dirty(mr, addr1, 2);
- r = MEMTX_OK;
- }
- if (result) {
- *result = r;
- }
- if (release_lock) {
- qemu_mutex_unlock_iothread();
+ l = len;
+ mr = address_space_translate(as, addr, &xlat, &l, is_write);
+ if (!memory_access_is_direct(mr, is_write)) {
+ return -EINVAL;
}
- rcu_read_unlock();
-}
-
-void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- address_space_stw_internal(as, addr, val, attrs, result,
- DEVICE_NATIVE_ENDIAN);
-}
-void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- address_space_stw_internal(as, addr, val, attrs, result,
- DEVICE_LITTLE_ENDIAN);
-}
-
-void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- address_space_stw_internal(as, addr, val, attrs, result,
- DEVICE_BIG_ENDIAN);
-}
+ l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
+ ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
-void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
+ cache->xlat = xlat;
+ cache->is_write = is_write;
+ cache->mr = mr;
+ cache->ptr = ptr;
+ cache->len = l;
+ memory_region_ref(cache->mr);
-void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
- address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+ return l;
}
-void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
+void address_space_cache_invalidate(MemoryRegionCache *cache,
+ hwaddr addr,
+ hwaddr access_len)
{
- address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+ assert(cache->is_write);
+ invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
}
-/* XXX: optimize */
-void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
- MemTxAttrs attrs, MemTxResult *result)
+void address_space_cache_destroy(MemoryRegionCache *cache)
{
- MemTxResult r;
- val = tswap64(val);
- r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
- if (result) {
- *result = r;
+ if (!cache->mr) {
+ return;
}
-}
-void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- MemTxResult r;
- val = cpu_to_le64(val);
- r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
- if (result) {
- *result = r;
- }
-}
-void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
- MemTxAttrs attrs, MemTxResult *result)
-{
- MemTxResult r;
- val = cpu_to_be64(val);
- r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
- if (result) {
- *result = r;
+ if (xen_enabled()) {
+ xen_invalidate_map_cache_entry(cache->ptr);
}
+ memory_region_unref(cache->mr);
+ cache->mr = NULL;
}
-void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
-{
- address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
-{
- address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
-{
- address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
+/* Called from RCU critical section. This function has the same
+ * semantics as address_space_translate, but it only works on a
+ * predefined range of a MemoryRegion that was mapped with
+ * address_space_cache_init.
+ */
+static inline MemoryRegion *address_space_translate_cached(
+ MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
+ hwaddr *plen, bool is_write)
+{
+ assert(addr < cache->len && *plen <= cache->len - addr);
+ *xlat = addr + cache->xlat;
+ return cache->mr;
+}
+
+#define ARG1_DECL MemoryRegionCache *cache
+#define ARG1 cache
+#define SUFFIX _cached
+#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
+#define IS_DIRECT(mr, is_write) true
+#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat))
+#define INVALIDATE(mr, ofs, len) ((void)0)
+#define RCU_READ_LOCK() ((void)0)
+#define RCU_READ_UNLOCK() ((void)0)
+#include "memory_ldst.inc.c"
/* virtual memory access for debug (includes writing to ROM) */
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
hwaddr phys_addr;
target_ulong page;
+ cpu_synchronize_state(cpu);
while (len > 0) {
int asidx;
MemTxAttrs attrs;
rcu_read_unlock();
return ret;
}
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+ int ret = -1;
+
+ uint8_t *host_startaddr = rb->host + start;
+
+ if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if ((start + length) <= rb->used_length) {
+ uint8_t *host_endaddr = host_startaddr + length;
+ if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+
+ errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+ if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+ /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+ * freeing the page.
+ */
+ ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+ } else {
+ /* Huge page case - unfortunately it can't do DONTNEED, but
+ * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+ * huge page file.
+ */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ start, length);
+#endif
+ }
+ if (ret) {
+ ret = -errno;
+ error_report("ram_block_discard_range: Failed to discard range "
+ "%s:%" PRIx64 " +%zx (%d)",
+ rb->idstr, start, length, ret);
+ }
+ } else {
+ error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+ "/%zx/" RAM_ADDR_FMT")",
+ rb->idstr, start, length, rb->used_length);
+ }
+
+err:
+ return ret;
+}
+
#endif