X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=numa.c;h=6fc2393ddd803726bed28af7415d8276f0c34941;hb=709a340d679d95a0c6cbb9b5f654498f04345b50;hp=e67322a69bfb1f45d4d27bb67bc6a997533ca851;hpb=bc2256c4ae86308a1521c89456b599d441119418;p=mirror_qemu.git diff --git a/numa.c b/numa.c index e67322a69b..6fc2393ddd 100644 --- a/numa.c +++ b/numa.c @@ -22,16 +22,16 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "sysemu/numa.h" #include "exec/cpu-common.h" +#include "exec/ramlist.h" #include "qemu/bitmap.h" #include "qom/cpu.h" #include "qemu/error-report.h" #include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */ #include "qapi-visit.h" #include "qapi/opts-visitor.h" -#include "qapi/dealloc-visitor.h" -#include "qapi/qmp/qerror.h" #include "hw/boards.h" #include "sysemu/hostmem.h" #include "qmp-commands.h" @@ -53,6 +53,93 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. int nb_numa_nodes; NodeInfo numa_info[MAX_NODES]; +void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) +{ + struct numa_addr_range *range; + + /* + * Memory-less nodes can come here with 0 size in which case, + * there is nothing to do. + */ + if (!size) { + return; + } + + range = g_malloc0(sizeof(*range)); + range->mem_start = addr; + range->mem_end = addr + size - 1; + QLIST_INSERT_HEAD(&numa_info[node].addr, range, entry); +} + +void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) +{ + struct numa_addr_range *range, *next; + + QLIST_FOREACH_SAFE(range, &numa_info[node].addr, entry, next) { + if (addr == range->mem_start && (addr + size - 1) == range->mem_end) { + QLIST_REMOVE(range, entry); + g_free(range); + return; + } + } +} + +static void numa_set_mem_ranges(void) +{ + int i; + ram_addr_t mem_start = 0; + + /* + * Deduce start address of each node and use it to store + * the address range info in numa_info address range list + */ + for (i = 0; i < nb_numa_nodes; i++) { + numa_set_mem_node_id(mem_start, numa_info[i].node_mem, i); + mem_start += numa_info[i].node_mem; + } +} + +/* + * Check if @addr falls under NUMA @node. + */ +static bool numa_addr_belongs_to_node(ram_addr_t addr, uint32_t node) +{ + struct numa_addr_range *range; + + QLIST_FOREACH(range, &numa_info[node].addr, entry) { + if (addr >= range->mem_start && addr <= range->mem_end) { + return true; + } + } + return false; +} + +/* + * Given an address, return the index of the NUMA node to which the + * address belongs to. + */ +uint32_t numa_get_node(ram_addr_t addr, Error **errp) +{ + uint32_t i; + + /* For non NUMA configurations, check if the addr falls under node 0 */ + if (!nb_numa_nodes) { + if (numa_addr_belongs_to_node(addr, 0)) { + return 0; + } + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_addr_belongs_to_node(addr, i)) { + return i; + } + } + + error_setg(errp, "Address 0x" RAM_ADDR_FMT " doesn't belong to any " + "NUMA node", addr); + return -1; +} + static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; @@ -131,20 +218,20 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) Error *err = NULL; { - OptsVisitor *ov = opts_visitor_new(opts); - visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err); - opts_visitor_cleanup(ov); + Visitor *v = opts_visitor_new(opts); + visit_type_NumaOptions(v, NULL, &object, &err); + visit_free(v); } if (err) { - goto error; + goto end; } - switch (object->kind) { - case NUMA_OPTIONS_KIND_NODE: - numa_node_parse(object->node, opts, &err); + switch (object->type) { + case NUMA_OPTIONS_TYPE_NODE: + numa_node_parse(&object->u.node, opts, &err); if (err) { - goto error; + goto end; } nb_numa_nodes++; break; @@ -152,19 +239,14 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) abort(); } - return 0; - -error: - error_report_err(err); - - if (object) { - QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); - visit_type_NumaOptions(qapi_dealloc_get_visitor(dv), - &object, NULL, NULL); - qapi_dealloc_visitor_cleanup(dv); +end: + qapi_free_NumaOptions(object); + if (err) { + error_report_err(err); + return -1; } - return -1; + return 0; } static char *enumerate_cpus(unsigned long *cpus, int max_cpus) @@ -185,20 +267,19 @@ static char *enumerate_cpus(unsigned long *cpus, int max_cpus) static void validate_numa_cpus(void) { int i; - DECLARE_BITMAP(seen_cpus, MAX_CPUMASK_BITS); + unsigned long *seen_cpus = bitmap_new(max_cpus); - bitmap_zero(seen_cpus, MAX_CPUMASK_BITS); for (i = 0; i < nb_numa_nodes; i++) { - if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, - MAX_CPUMASK_BITS)) { + if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, max_cpus)) { bitmap_and(seen_cpus, seen_cpus, - numa_info[i].node_cpu, MAX_CPUMASK_BITS); + numa_info[i].node_cpu, max_cpus); error_report("CPU(s) present in multiple NUMA nodes: %s", - enumerate_cpus(seen_cpus, max_cpus));; + enumerate_cpus(seen_cpus, max_cpus)); + g_free(seen_cpus); exit(EXIT_FAILURE); } bitmap_or(seen_cpus, seen_cpus, - numa_info[i].node_cpu, MAX_CPUMASK_BITS); + numa_info[i].node_cpu, max_cpus); } if (!bitmap_full(seen_cpus, max_cpus)) { @@ -210,12 +291,17 @@ static void validate_numa_cpus(void) "in NUMA config"); g_free(msg); } + g_free(seen_cpus); } void parse_numa_opts(MachineClass *mc) { int i; + for (i = 0; i < MAX_NODES; i++) { + numa_info[i].node_cpu = bitmap_new(max_cpus); + } + if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, NULL, NULL)) { exit(1); } @@ -252,12 +338,12 @@ void parse_numa_opts(MachineClass *mc) if (i == nb_numa_nodes) { uint64_t usedmem = 0; - /* On Linux, each node's border has to be 8MB aligned, - * the final node gets the rest. + /* Align each node according to the alignment + * requirements of the machine class */ for (i = 0; i < nb_numa_nodes - 1; i++) { numa_info[i].node_mem = (ram_size / nb_numa_nodes) & - ~((1 << 23UL) - 1); + ~((1 << mc->numa_mem_align_shift) - 1); usedmem += numa_info[i].node_mem; } numa_info[i].node_mem = ram_size - usedmem; @@ -275,7 +361,13 @@ void parse_numa_opts(MachineClass *mc) } for (i = 0; i < nb_numa_nodes; i++) { - if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) { + QLIST_INIT(&numa_info[i].addr); + } + + numa_set_mem_ranges(); + + for (i = 0; i < nb_numa_nodes; i++) { + if (!bitmap_empty(numa_info[i].node_cpu, max_cpus)) { break; } } @@ -298,6 +390,8 @@ void parse_numa_opts(MachineClass *mc) } validate_numa_cpus(); + } else { + numa_set_mem_node_id(0, ram_size, 0); } } @@ -308,6 +402,7 @@ void numa_post_machine_init(void) CPU_FOREACH(cpu) { for (i = 0; i < nb_numa_nodes; i++) { + assert(cpu->cpu_index < max_cpus); if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { cpu->numa_node = i; } @@ -324,20 +419,23 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, Error *err = NULL; memory_region_init_ram_from_file(mr, owner, name, ram_size, false, mem_path, &err); - - /* Legacy behavior: if allocation failed, fall back to - * regular RAM allocation. - */ if (err) { error_report_err(err); - memory_region_init_ram(mr, owner, name, ram_size, &error_abort); + if (mem_prealloc) { + exit(1); + } + + /* Legacy behavior: if allocation failed, fall back to + * regular RAM allocation. + */ + memory_region_init_ram(mr, owner, name, ram_size, &error_fatal); } #else fprintf(stderr, "-mem-path not supported on this host\n"); exit(1); #endif } else { - memory_region_init_ram(mr, owner, name, ram_size, &error_abort); + memory_region_init_ram(mr, owner, name, ram_size, &error_fatal); } vmstate_register_ram_global(mr); } @@ -356,17 +454,13 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, memory_region_init(mr, owner, name, ram_size); for (i = 0; i < MAX_NODES; i++) { - Error *local_err = NULL; uint64_t size = numa_info[i].node_mem; HostMemoryBackend *backend = numa_info[i].node_memdev; if (!backend) { continue; } - MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); - if (local_err) { - error_report_err(local_err); - exit(1); - } + MemoryRegion *seg = host_memory_backend_get_memory(backend, + &error_fatal); if (memory_region_is_mapped(seg)) { char *path = object_get_canonical_path_component(OBJECT(backend)); @@ -376,6 +470,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, exit(1); } + host_memory_backend_set_mapped(backend, true); memory_region_add_subregion(mr, addr, seg); vmstate_register_ram_global(seg); addr += size; @@ -393,9 +488,9 @@ static void numa_stat_memory_devices(uint64_t node_mem[]) MemoryDeviceInfo *value = info->value; if (value) { - switch (value->kind) { + switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: - node_mem[value->dimm->node] += value->dimm->size; + node_mem[value->u.dimm.data->node] += value->u.dimm.data->size; break; default: break; @@ -423,80 +518,86 @@ static int query_memdev(Object *obj, void *opaque) { MemdevList **list = opaque; MemdevList *m = NULL; - Error *err = NULL; if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { m = g_malloc0(sizeof(*m)); m->value = g_malloc0(sizeof(*m->value)); - m->value->size = object_property_get_int(obj, "size", - &err); - if (err) { - goto error; - } + m->value->id = object_property_get_str(obj, "id", NULL); + m->value->has_id = !!m->value->id; + m->value->size = object_property_get_int(obj, "size", + &error_abort); m->value->merge = object_property_get_bool(obj, "merge", - &err); - if (err) { - goto error; - } - + &error_abort); m->value->dump = object_property_get_bool(obj, "dump", - &err); - if (err) { - goto error; - } - + &error_abort); m->value->prealloc = object_property_get_bool(obj, - "prealloc", &err); - if (err) { - goto error; - } - + "prealloc", + &error_abort); m->value->policy = object_property_get_enum(obj, "policy", - HostMemPolicy_lookup, - &err); - if (err) { - goto error; - } - + "HostMemPolicy", + &error_abort); object_property_get_uint16List(obj, "host-nodes", - &m->value->host_nodes, &err); - if (err) { - goto error; - } + &m->value->host_nodes, + &error_abort); m->next = *list; *list = m; } return 0; -error: - g_free(m->value); - g_free(m); - - return -1; } MemdevList *qmp_query_memdev(Error **errp) { - Object *obj; + Object *obj = object_get_objects_root(); MemdevList *list = NULL; - obj = object_get_objects_root(); - if (obj == NULL) { - return NULL; + object_child_foreach(obj, query_memdev, &list); + return list; +} + +int numa_get_node_for_cpu(int idx) +{ + int i; + + assert(idx < max_cpus); + + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(idx, numa_info[i].node_cpu)) { + break; + } } + return i; +} + +void ram_block_notifier_add(RAMBlockNotifier *n) +{ + QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); +} + +void ram_block_notifier_remove(RAMBlockNotifier *n) +{ + QLIST_REMOVE(n, next); +} - if (object_child_foreach(obj, query_memdev, &list) != 0) { - goto error; +void ram_block_notify_add(void *host, size_t size) +{ + RAMBlockNotifier *notifier; + + QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + notifier->ram_block_added(notifier, host, size); } +} - return list; +void ram_block_notify_remove(void *host, size_t size) +{ + RAMBlockNotifier *notifier; -error: - qapi_free_MemdevList(list); - return NULL; + QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + notifier->ram_block_removed(notifier, host, size); + } }