X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=exec.c;h=67e520d18ea5b65932f899b6c5981c74cfc15992;hb=644acf99b8cb8437f65600cf00c2e090bf3e3bc2;hp=53a15b7ad7c756996b0a5c5774811eb593b1b044;hpb=9bf825bf3df4ebae3af51566c8088e3f1249a910;p=mirror_qemu.git diff --git a/exec.c b/exec.c index 53a15b7ad7..67e520d18e 100644 --- a/exec.c +++ b/exec.c @@ -25,7 +25,7 @@ #include "cpu.h" #include "exec/exec-all.h" #include "exec/target_page.h" -#include "tcg.h" +#include "tcg/tcg.h" #include "hw/qdev-core.h" #include "hw/qdev-properties.h" #if !defined(CONFIG_USER_ONLY) @@ -65,6 +65,8 @@ #include "exec/ram_addr.h" #include "exec/log.h" +#include "qemu/pmem.h" + #include "migration/vmstate.h" #include "qemu/range.h" @@ -88,15 +90,9 @@ static MemoryRegion *system_io; AddressSpace address_space_io; AddressSpace address_space_memory; -MemoryRegion io_mem_rom, io_mem_notdirty; static MemoryRegion io_mem_unassigned; #endif -#ifdef TARGET_PAGE_BITS_VARY -int target_page_bits; -bool target_page_bits_decided; -#endif - CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); /* current CPU in the current thread. It is only valid inside @@ -110,37 +106,8 @@ int use_icount; uintptr_t qemu_host_page_size; intptr_t qemu_host_page_mask; -bool set_preferred_target_page_bits(int bits) -{ - /* The target page size is the lowest common denominator for all - * the CPUs in the system, so we can only make it smaller, never - * larger. And we can't make it smaller once we've committed to - * a particular size. - */ -#ifdef TARGET_PAGE_BITS_VARY - assert(bits >= TARGET_PAGE_BITS_MIN); - if (target_page_bits == 0 || target_page_bits > bits) { - if (target_page_bits_decided) { - return false; - } - target_page_bits = bits; - } -#endif - return true; -} - #if !defined(CONFIG_USER_ONLY) -static void finalize_target_page_bits(void) -{ -#ifdef TARGET_PAGE_BITS_VARY - if (target_page_bits == 0) { - target_page_bits = TARGET_PAGE_BITS_MIN; - } - target_page_bits_decided = true; -#endif -} - typedef struct PhysPageEntry PhysPageEntry; struct PhysPageEntry { @@ -191,17 +158,12 @@ typedef struct subpage_t { } subpage_t; #define PHYS_SECTION_UNASSIGNED 0 -#define PHYS_SECTION_NOTDIRTY 1 -#define PHYS_SECTION_ROM 2 -#define PHYS_SECTION_WATCH 3 static void io_mem_init(void); static void memory_map_init(void); static void tcg_log_global_after_sync(MemoryListener *listener); static void tcg_commit(MemoryListener *listener); -static MemoryRegion io_mem_watch; - /** * CPUAddressSpace: all the information a CPU needs about an AddressSpace * @cpu: the CPU whose AddressSpace this is @@ -230,8 +192,7 @@ static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) { static unsigned alloc_hint = 16; if (map->nodes_nb + nodes > map->nodes_nb_alloc) { - map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint); - map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes); + map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes); map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); alloc_hint = map->nodes_nb_alloc; } @@ -258,7 +219,7 @@ static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) } static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, - hwaddr *index, hwaddr *nb, uint16_t leaf, + hwaddr *index, uint64_t *nb, uint16_t leaf, int level) { PhysPageEntry *p; @@ -284,7 +245,7 @@ static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, } static void phys_page_set(AddressSpaceDispatch *d, - hwaddr index, hwaddr nb, + hwaddr index, uint64_t nb, uint16_t leaf) { /* Wildly overreserve - it doesn't matter much. */ @@ -328,7 +289,8 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes) assert(valid_ptr < P_L2_SIZE); /* Don't compress if it won't fit in the # of bits we have. */ - if (lp->skip + p[valid_ptr].skip >= (1 << 3)) { + if (P_L2_LEVELS >= (1 << 6) && + lp->skip + p[valid_ptr].skip >= (1 << 6)) { return; } @@ -666,7 +628,8 @@ static void tcg_register_iommu_notifier(CPUState *cpu, */ MemoryRegion *mr = MEMORY_REGION(iommu_mr); TCGIOMMUNotifier *notifier; - int i; + Error *err = NULL; + int i, ret; for (i = 0; i < cpu->iommu_notifiers->len; i++) { notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); @@ -695,7 +658,12 @@ static void tcg_register_iommu_notifier(CPUState *cpu, 0, HWADDR_MAX, iommu_idx); - memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n); + ret = memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n, + &err); + if (ret) { + error_report_err(err); + exit(1); + } } if (!notifier->active) { @@ -975,6 +943,8 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) } tlb_init(cpu); + qemu_plugin_vcpu_init_hook(cpu); + #ifndef CONFIG_USER_ONLY if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu); @@ -1018,7 +988,7 @@ const char *parse_cpu_option(const char *cpu_option) void tb_invalidate_phys_addr(target_ulong addr) { mmap_lock(); - tb_invalidate_phys_page_range(addr, addr + 1, 0); + tb_invalidate_phys_page_range(addr, addr + 1); mmap_unlock(); } @@ -1037,16 +1007,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) return; } - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); mr = address_space_translate(as, addr, &addr, &l, false, attrs); if (!(memory_region_is_ram(mr) || memory_region_is_romd(mr))) { - rcu_read_unlock(); return; } ram_addr = memory_region_get_ram_addr(mr) + addr; - tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); - rcu_read_unlock(); + tb_invalidate_phys_page_range(ram_addr, ram_addr + 1); } static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) @@ -1062,28 +1030,7 @@ static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) } #endif -#if defined(CONFIG_USER_ONLY) -void cpu_watchpoint_remove_all(CPUState *cpu, int mask) - -{ -} - -int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, - int flags) -{ - return -ENOSYS; -} - -void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) -{ -} - -int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, - int flags, CPUWatchpoint **watchpoint) -{ - return -ENOSYS; -} -#else +#ifndef CONFIG_USER_ONLY /* Add a watchpoint. */ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, int flags, CPUWatchpoint **watchpoint) @@ -1159,9 +1106,8 @@ void cpu_watchpoint_remove_all(CPUState *cpu, int mask) * partially or completely with the address range covered by the * access). */ -static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp, - vaddr addr, - vaddr len) +static inline bool watchpoint_address_matches(CPUWatchpoint *wp, + vaddr addr, vaddr len) { /* We know the lengths are non-zero, but a little caution is * required to avoid errors in the case where the range ends @@ -1174,7 +1120,20 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp, return !(addr > wpend || wp->vaddr > addrend); } -#endif +/* Return flags for watchpoints that match addr + prot. */ +int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len) +{ + CPUWatchpoint *wp; + int ret = 0; + + QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { + if (watchpoint_address_matches(wp, addr, TARGET_PAGE_SIZE)) { + ret |= wp->flags; + } + } + return ret; +} +#endif /* !CONFIG_USER_ONLY */ /* Add a breakpoint. */ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags, @@ -1266,13 +1225,13 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) fprintf(stderr, "\n"); cpu_dump_state(cpu, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP); if (qemu_log_separate()) { - qemu_log_lock(); + FILE *logfile = qemu_log_lock(); qemu_log("qemu: fatal: "); qemu_log_vprintf(fmt, ap2); qemu_log("\n"); log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP); qemu_log_flush(); - qemu_log_unlock(); + qemu_log_unlock(logfile); qemu_log_close(); } va_end(ap2); @@ -1341,14 +1300,13 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); block = qemu_get_ram_block(start); assert(block == qemu_get_ram_block(end - 1)); start1 = (uintptr_t)ramblock_ptr(block, start - block->offset); CPU_FOREACH(cpu) { tlb_reset_dirty(cpu, start1, length); } - rcu_read_unlock(); } /* Note: start and end must be within the same ram block. */ @@ -1369,30 +1327,29 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + ramblock = qemu_get_ram_block(start); + /* Range sanity check on the ramblock */ + assert(start >= ramblock->offset && + start + length <= ramblock->offset + ramblock->used_length); - blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); - ramblock = qemu_get_ram_block(start); - /* Range sanity check on the ramblock */ - assert(start >= ramblock->offset && - start + length <= ramblock->offset + ramblock->used_length); + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, + DIRTY_MEMORY_BLOCK_SIZE - offset); - while (page < end) { - unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; - unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); + dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], + offset, num); + page += num; + } - dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx], - offset, num); - page += num; + mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset; + mr_size = (end - page) << TARGET_PAGE_BITS; + memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); } - mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset; - mr_size = (end - page) << TARGET_PAGE_BITS; - memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); - - rcu_read_unlock(); - if (dirty && tcg_enabled()) { tlb_reset_dirty_range_all(start, length); } @@ -1420,28 +1377,27 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty end = last >> TARGET_PAGE_BITS; dest = 0; - rcu_read_lock(); - - blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + WITH_RCU_READ_LOCK_GUARD() { + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); - while (page < end) { - unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; - unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; - unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, + DIRTY_MEMORY_BLOCK_SIZE - offset); - assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL))); - assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); - offset >>= BITS_PER_LEVEL; + assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL))); + assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); + offset >>= BITS_PER_LEVEL; - bitmap_copy_and_clear_atomic(snap->dirty + dest, - blocks->blocks[idx] + offset, - num); - page += num; - dest += num >> BITS_PER_LEVEL; + bitmap_copy_and_clear_atomic(snap->dirty + dest, + blocks->blocks[idx] + offset, + num); + page += num; + dest += num >> BITS_PER_LEVEL; + } } - rcu_read_unlock(); - if (tcg_enabled()) { tlb_reset_dirty_range_all(start, length); } @@ -1474,52 +1430,17 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, - MemoryRegionSection *section, - target_ulong vaddr, - hwaddr paddr, hwaddr xlat, - int prot, - target_ulong *address) + MemoryRegionSection *section) { - hwaddr iotlb; - CPUWatchpoint *wp; - - if (memory_region_is_ram(section->mr)) { - /* Normal RAM. */ - iotlb = memory_region_get_ram_addr(section->mr) + xlat; - if (!section->readonly) { - iotlb |= PHYS_SECTION_NOTDIRTY; - } else { - iotlb |= PHYS_SECTION_ROM; - } - } else { - AddressSpaceDispatch *d; - - d = flatview_to_dispatch(section->fv); - iotlb = section - d->map.sections; - iotlb += xlat; - } - - /* Make accesses to pages with watchpoints go via the - watchpoint trap routines. */ - QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) { - /* Avoid trapping reads of pages with a write breakpoint. */ - if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) { - iotlb = PHYS_SECTION_WATCH + paddr; - *address |= TLB_MMIO; - break; - } - } - } - - return iotlb; + AddressSpaceDispatch *d = flatview_to_dispatch(section->fv); + return section - d->map.sections; } #endif /* defined(CONFIG_USER_ONLY) */ #if !defined(CONFIG_USER_ONLY) -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section); +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section); static subpage_t *subpage_init(FlatView *fv, hwaddr base); static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) = @@ -1687,7 +1608,7 @@ void ram_block_dump(Monitor *mon) RAMBlock *block; char *psize; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); monitor_printf(mon, "%24s %8s %18s %18s %18s\n", "Block Name", "PSize", "Offset", "Used", "Total"); RAMBLOCK_FOREACH(block) { @@ -1699,7 +1620,6 @@ void ram_block_dump(Monitor *mon) (uint64_t)block->max_length); g_free(psize); } - rcu_read_unlock(); } #ifdef __linux__ @@ -1750,6 +1670,7 @@ long qemu_minrampagesize(void) long hpsize = LONG_MAX; long mainrampagesize; Object *memdev_root; + MachineState *ms = MACHINE(qdev_get_machine()); mainrampagesize = qemu_mempath_getpagesize(mem_path); @@ -1777,7 +1698,9 @@ long qemu_minrampagesize(void) * so if its page size is smaller we have got to report that size instead. */ if (hpsize > mainrampagesize && - (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { + (ms->numa_state == NULL || + ms->numa_state->num_nodes == 0 || + ms->numa_state->nodes[0].node_memdev == NULL)) { static bool warned; if (!warned) { error_report("Huge page support disabled (n/a for main memory)."); @@ -1803,18 +1726,50 @@ long qemu_maxrampagesize(void) #else long qemu_minrampagesize(void) { - return getpagesize(); + return qemu_real_host_page_size; } long qemu_maxrampagesize(void) { - return getpagesize(); + return qemu_real_host_page_size; } #endif #ifdef CONFIG_POSIX static int64_t get_file_size(int fd) { - int64_t size = lseek(fd, 0, SEEK_END); + int64_t size; +#if defined(__linux__) + struct stat st; + + if (fstat(fd, &st) < 0) { + return -errno; + } + + /* Special handling for devdax character devices */ + if (S_ISCHR(st.st_mode)) { + g_autofree char *subsystem_path = NULL; + g_autofree char *subsystem = NULL; + + subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", + major(st.st_rdev), minor(st.st_rdev)); + subsystem = g_file_read_link(subsystem_path, NULL); + + if (subsystem && g_str_has_suffix(subsystem, "/dax")) { + g_autofree char *size_path = NULL; + g_autofree char *size_str = NULL; + + size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", + major(st.st_rdev), minor(st.st_rdev)); + + if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { + return g_ascii_strtoll(size_str, NULL, 0); + } + } + } +#endif /* defined(__linux__) */ + + /* st.st_size may be zero for special files yet lseek(2) works */ + size = lseek(fd, 0, SEEK_END); if (size < 0) { return -errno; } @@ -1888,6 +1843,7 @@ static void *file_ram_alloc(RAMBlock *block, bool truncate, Error **errp) { + Error *err = NULL; MachineState *ms = MACHINE(qdev_get_machine()); void *area; @@ -1945,8 +1901,9 @@ static void *file_ram_alloc(RAMBlock *block, } if (mem_prealloc) { - os_mem_prealloc(fd, area, memory, ms->smp.cpus, errp); - if (errp && *errp) { + os_mem_prealloc(fd, area, memory, ms->smp.cpus, &err); + if (err) { + error_propagate(errp, err); qemu_ram_munmap(fd, area, memory); return NULL; } @@ -2018,11 +1975,10 @@ static unsigned long last_ram_page(void) RAMBlock *block; ram_addr_t last = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBLOCK_FOREACH(block) { last = MAX(last, block->offset + block->max_length); } - rcu_read_unlock(); return last >> TARGET_PAGE_BITS; } @@ -2109,7 +2065,7 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) } pstrcat(new_block->idstr, sizeof(new_block->idstr), name); - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBLOCK_FOREACH(block) { if (block != new_block && !strcmp(block->idstr, new_block->idstr)) { @@ -2118,7 +2074,6 @@ void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) abort(); } } - rcu_read_unlock(); } /* Called with iothread lock held. */ @@ -2205,6 +2160,40 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) return 0; } +/* + * Trigger sync on the given ram block for range [start, start + length] + * with the backing store if one is available. + * Otherwise no-op. + * @Note: this is supposed to be a synchronous op. + */ +void qemu_ram_writeback(RAMBlock *block, ram_addr_t start, ram_addr_t length) +{ + void *addr = ramblock_ptr(block, start); + + /* The requested range should fit in within the block range */ + g_assert((start + length) <= block->used_length); + +#ifdef CONFIG_LIBPMEM + /* The lack of support for pmem should not block the sync */ + if (ramblock_is_pmem(block)) { + pmem_persist(addr, length); + return; + } +#endif + if (block->fd >= 0) { + /** + * Case there is no support for PMEM or the memory has not been + * specified as persistent (or is not one) - use the msync. + * Less optimal but still achieves the same goal + */ + if (qemu_msync(addr, length, block->fd)) { + warn_report("%s: failed to sync memory range: start: " + RAM_ADDR_FMT " length: " RAM_ADDR_FMT, + __func__, start, length); + } + } +} + /* Called with ram_list.mutex held */ static void dirty_memory_extend(ram_addr_t old_ram_size, ram_addr_t new_ram_size) @@ -2434,7 +2423,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, new_block->max_length = max_size; assert(max_size >= size); new_block->fd = -1; - new_block->page_size = getpagesize(); + new_block->page_size = qemu_real_host_page_size; new_block->host = host; if (host) { new_block->flags |= RAM_PREALLOC; @@ -2660,17 +2649,16 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, if (xen_enabled()) { ram_addr_t ram_addr; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); ram_addr = xen_ram_addr_from_mapcache(ptr); block = qemu_get_ram_block(ram_addr); if (block) { *offset = ram_addr - block->offset; } - rcu_read_unlock(); return block; } - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); block = atomic_rcu_read(&ram_list.mru_block); if (block && block->host && host - block->host < block->max_length) { goto found; @@ -2686,7 +2674,6 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, } } - rcu_read_unlock(); return NULL; found: @@ -2694,7 +2681,6 @@ found: if (round_offset) { *offset &= TARGET_PAGE_MASK; } - rcu_read_unlock(); return block; } @@ -2733,110 +2719,36 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) return block->offset + offset; } -/* Called within RCU critical section. */ -void memory_notdirty_write_prepare(NotDirtyInfo *ndi, - CPUState *cpu, - vaddr mem_vaddr, - ram_addr_t ram_addr, - unsigned size) -{ - ndi->cpu = cpu; - ndi->ram_addr = ram_addr; - ndi->mem_vaddr = mem_vaddr; - ndi->size = size; - ndi->pages = NULL; - - assert(tcg_enabled()); - if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->pages = page_collection_lock(ram_addr, ram_addr + size); - tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); - } -} - -/* Called within RCU critical section. */ -void memory_notdirty_write_complete(NotDirtyInfo *ndi) -{ - if (ndi->pages) { - assert(tcg_enabled()); - page_collection_unlock(ndi->pages); - ndi->pages = NULL; - } - - /* Set both VGA and migration bits for simplicity and to remove - * the notdirty callback faster. - */ - cpu_physical_memory_set_dirty_range(ndi->ram_addr, ndi->size, - DIRTY_CLIENTS_NOCODE); - /* we remove the notdirty callback only if the code has been - flushed */ - if (!cpu_physical_memory_is_clean(ndi->ram_addr)) { - tlb_set_dirty(ndi->cpu, ndi->mem_vaddr); - } -} - -/* Called within RCU critical section. */ -static void notdirty_mem_write(void *opaque, hwaddr ram_addr, - uint64_t val, unsigned size) -{ - NotDirtyInfo ndi; - - memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr, - ram_addr, size); - - stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val); - memory_notdirty_write_complete(&ndi); -} - -static bool notdirty_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -static const MemoryRegionOps notdirty_mem_ops = { - .write = notdirty_mem_write, - .valid.accepts = notdirty_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - /* Generate a debug exception if a watchpoint has been hit. */ -static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) +void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, + MemTxAttrs attrs, int flags, uintptr_t ra) { - CPUState *cpu = current_cpu; CPUClass *cc = CPU_GET_CLASS(cpu); - target_ulong vaddr; CPUWatchpoint *wp; assert(tcg_enabled()); if (cpu->watchpoint_hit) { - /* We re-entered the check after replacing the TB. Now raise - * the debug interrupt so that is will trigger after the - * current instruction. */ + /* + * We re-entered the check after replacing the TB. + * Now raise the debug interrupt so that it will + * trigger after the current instruction. + */ + qemu_mutex_lock_iothread(); cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG); + qemu_mutex_unlock_iothread(); return; } - vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset; - vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len); + + addr = cc->adjust_watchpoint_address(cpu, addr, len); QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (cpu_watchpoint_address_matches(wp, vaddr, len) + if (watchpoint_address_matches(wp, addr, len) && (wp->flags & flags)) { if (flags == BP_MEM_READ) { wp->flags |= BP_WATCHPOINT_HIT_READ; } else { wp->flags |= BP_WATCHPOINT_HIT_WRITE; } - wp->hitaddr = vaddr; + wp->hitaddr = MAX(addr, wp->vaddr); wp->hitattrs = attrs; if (!cpu->watchpoint_hit) { if (wp->flags & BP_CPU && @@ -2847,15 +2759,18 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) cpu->watchpoint_hit = wp; mmap_lock(); - tb_check_watchpoint(cpu); + tb_check_watchpoint(cpu, ra); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; mmap_unlock(); - cpu_loop_exit(cpu); + cpu_loop_exit_restore(cpu, ra); } else { /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | curr_cflags(); mmap_unlock(); + if (ra) { + cpu_restore_state(cpu, ra, true); + } cpu_loop_exit_noexc(cpu); } } @@ -2865,80 +2780,6 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) } } -/* Watchpoint access routines. Watchpoints are inserted using TLB tricks, - so these check for a hit then pass through to the normal out-of-line - phys routines. */ -static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata, - unsigned size, MemTxAttrs attrs) -{ - MemTxResult res; - uint64_t data; - int asidx = cpu_asidx_from_attrs(current_cpu, attrs); - AddressSpace *as = current_cpu->cpu_ases[asidx].as; - - check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ); - switch (size) { - case 1: - data = address_space_ldub(as, addr, attrs, &res); - break; - case 2: - data = address_space_lduw(as, addr, attrs, &res); - break; - case 4: - data = address_space_ldl(as, addr, attrs, &res); - break; - case 8: - data = address_space_ldq(as, addr, attrs, &res); - break; - default: abort(); - } - *pdata = data; - return res; -} - -static MemTxResult watch_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size, - MemTxAttrs attrs) -{ - MemTxResult res; - int asidx = cpu_asidx_from_attrs(current_cpu, attrs); - AddressSpace *as = current_cpu->cpu_ases[asidx].as; - - check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE); - switch (size) { - case 1: - address_space_stb(as, addr, val, attrs, &res); - break; - case 2: - address_space_stw(as, addr, val, attrs, &res); - break; - case 4: - address_space_stl(as, addr, val, attrs, &res); - break; - case 8: - address_space_stq(as, addr, val, attrs, &res); - break; - default: abort(); - } - return res; -} - -static const MemoryRegionOps watch_mem_ops = { - .read_with_attrs = watch_mem_read, - .write_with_attrs = watch_mem_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - static MemTxResult flatview_read(FlatView *fv, hwaddr addr, MemTxAttrs attrs, uint8_t *buf, hwaddr len); static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, @@ -3005,8 +2846,8 @@ static const MemoryRegionOps subpage_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section) +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section) { int idx, eidx; @@ -3029,6 +2870,7 @@ static subpage_t *subpage_init(FlatView *fv, hwaddr base) { subpage_t *mmio; + /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */ mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t)); mmio->fv = fv; mmio->base = base; @@ -3039,7 +2881,6 @@ static subpage_t *subpage_init(FlatView *fv, hwaddr base) printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__, mmio, base, TARGET_PAGE_SIZE); #endif - subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED); return mmio; } @@ -3058,38 +2899,6 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) return phys_section_add(map, §ion); } -static void readonly_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - /* Ignore any write to ROM. */ -} - -static bool readonly_mem_accepts(void *opaque, hwaddr addr, - unsigned size, bool is_write, - MemTxAttrs attrs) -{ - return is_write; -} - -/* This will only be used for writes, because reads are special cased - * to directly access the underlying host ram. - */ -static const MemoryRegionOps readonly_mem_ops = { - .write = readonly_mem_write, - .valid.accepts = readonly_mem_accepts, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - .unaligned = false, - }, -}; - MemoryRegionSection *iotlb_to_section(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { @@ -3103,20 +2912,8 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, static void io_mem_init(void) { - memory_region_init_io(&io_mem_rom, NULL, &readonly_mem_ops, - NULL, NULL, UINT64_MAX); memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); - - /* io_mem_notdirty calls tb_invalidate_phys_page_fast, - * which can be called without the iothread mutex. - */ - memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, - NULL, UINT64_MAX); - memory_region_clear_global_locking(&io_mem_notdirty); - - memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL, - NULL, UINT64_MAX); } AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) @@ -3126,12 +2923,6 @@ AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv) n = dummy_section(&d->map, fv, &io_mem_unassigned); assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&d->map, fv, &io_mem_notdirty); - assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&d->map, fv, &io_mem_rom); - assert(n == PHYS_SECTION_ROM); - n = dummy_section(&d->map, fv, &io_mem_watch); - assert(n == PHYS_SECTION_WATCH); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 }; @@ -3169,8 +2960,17 @@ static void tcg_log_global_after_sync(MemoryListener *listener) * by pushing the migration thread's memory read after the vCPU thread has * written the memory. */ - cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); - run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL); + if (replay_mode == REPLAY_MODE_NONE) { + /* + * VGA can make calls to this function while updating the screen. + * In record/replay mode this causes a deadlock, because + * run_on_cpu waits for rr mutex. Therefore no races are possible + * in this case and no need for making run_on_cpu when + * record/replay is not enabled. + */ + cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); + run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL); + } } static void tcg_commit(MemoryListener *listener) @@ -3476,10 +3276,9 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, FlatView *fv; if (len > 0) { - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); result = flatview_read(fv, addr, attrs, buf, len); - rcu_read_unlock(); } return result; @@ -3493,10 +3292,9 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr, FlatView *fv; if (len > 0) { - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); result = flatview_write(fv, addr, attrs, buf, len); - rcu_read_unlock(); } return result; @@ -3536,7 +3334,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, hwaddr addr1; MemoryRegion *mr; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); while (len > 0) { l = len; mr = address_space_translate(as, addr, &addr1, &l, true, attrs); @@ -3561,7 +3359,6 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, buf += l; addr += l; } - rcu_read_unlock(); return MEMTX_OK; } @@ -3706,10 +3503,9 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, FlatView *fv; bool result; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); result = flatview_access_valid(fv, addr, len, is_write, attrs); - rcu_read_unlock(); return result; } @@ -3764,13 +3560,12 @@ void *address_space_map(AddressSpace *as, } l = len; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); if (!memory_access_is_direct(mr, is_write)) { if (atomic_xchg(&bounce.in_use, true)) { - rcu_read_unlock(); return NULL; } /* Avoid unbounded allocations */ @@ -3786,7 +3581,6 @@ void *address_space_map(AddressSpace *as, bounce.buffer, l); } - rcu_read_unlock(); *plen = l; return bounce.buffer; } @@ -3796,7 +3590,6 @@ void *address_space_map(AddressSpace *as, *plen = flatview_extend_translation(fv, addr, len, mr, xlat, l, is_write, attrs); ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen, true); - rcu_read_unlock(); return ptr; } @@ -4064,13 +3857,12 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr) hwaddr l = 1; bool res; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); mr = address_space_translate(&address_space_memory, phys_addr, &phys_addr, &l, false, MEMTXATTRS_UNSPECIFIED); res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); - rcu_read_unlock(); return res; } @@ -4079,14 +3871,13 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) RAMBlock *block; int ret = 0; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); RAMBLOCK_FOREACH(block) { ret = func(block, opaque); if (ret) { break; } } - rcu_read_unlock(); return ret; } @@ -4104,7 +3895,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) uint8_t *host_startaddr = rb->host + start; - if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { + if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) { error_report("ram_block_discard_range: Unaligned start address: %p", host_startaddr); goto err; @@ -4112,10 +3903,9 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) if ((start + length) <= rb->used_length) { bool need_madvise, need_fallocate; - uint8_t *host_endaddr = host_startaddr + length; - if ((uintptr_t)host_endaddr & (rb->page_size - 1)) { - error_report("ram_block_discard_range: Unaligned end address: %p", - host_endaddr); + if (!QEMU_IS_ALIGNED(length, rb->page_size)) { + error_report("ram_block_discard_range: Unaligned length: %zx", + length); goto err; }