#include "qemu/osdep.h"
#include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
#include "hw/boards.h"
#include "qapi/error.h"
#include "qapi/qapi-builtin-visit.h"
#include "qemu/config-file.h"
#include "qom/object_interfaces.h"
#include "qemu/mmap-alloc.h"
+#include "qemu/madvise.h"
#ifdef CONFIG_NUMA
#include <numaif.h>
+#include <numa.h>
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
+/*
+ * HOST_MEM_POLICY_PREFERRED may either translate to MPOL_PREFERRED or
+ * MPOL_PREFERRED_MANY, see comments further below.
+ */
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
host_memory_backend_get_name(HostMemoryBackend *backend)
{
if (!backend->use_canonical_path) {
- return object_get_canonical_path_component(OBJECT(backend));
+ return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
}
return object_get_canonical_path(OBJECT(backend));
void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- Error *local_err = NULL;
uint64_t value;
if (host_memory_backend_mr_inited(backend)) {
- error_setg(&local_err, "cannot change property %s of %s ",
- name, object_get_typename(obj));
- goto out;
+ error_setg(errp, "cannot change property %s of %s ", name,
+ object_get_typename(obj));
+ return;
}
- visit_type_size(v, name, &value, &local_err);
- if (local_err) {
- goto out;
+ if (!visit_type_size(v, name, &value, errp)) {
+ return;
}
if (!value) {
- error_setg(&local_err,
+ error_setg(errp,
"property '%s' of %s doesn't take value '%" PRIu64 "'",
name, object_get_typename(obj), value);
- goto out;
+ return;
}
backend->size = value;
-out:
- error_propagate(errp, local_err);
}
static void
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
uint16List *host_nodes = NULL;
- uint16List **node = &host_nodes;
+ uint16List **tail = &host_nodes;
unsigned long value;
value = find_first_bit(backend->host_nodes, MAX_NODES);
goto ret;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ QAPI_LIST_APPEND(tail, value);
do {
value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
break;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ QAPI_LIST_APPEND(tail, value);
} while (true);
ret:
visit_type_uint16List(v, name, &host_nodes, errp);
+ qapi_free_uint16List(host_nodes);
}
static void
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- return backend->prealloc || backend->force_prealloc;
+ return backend->prealloc;
}
static void host_memory_backend_set_prealloc(Object *obj, bool value,
{
Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- MachineState *ms = MACHINE(qdev_get_machine());
- if (backend->force_prealloc) {
- if (value) {
- error_setg(errp,
- "remove -mem-prealloc to use the prealloc property");
- return;
- }
+ if (!backend->reserve && value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
}
if (!host_memory_backend_mr_inited(backend)) {
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
- os_mem_prealloc(fd, ptr, sz, ms->smp.cpus, &local_err);
+ qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
+ backend->prealloc_context, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
}
+static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
+ const char *name, void *opaque, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ visit_type_uint32(v, name, &backend->prealloc_threads, errp);
+}
+
+static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
+ const char *name, void *opaque, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ uint32_t value;
+
+ if (!visit_type_uint32(v, name, &value, errp)) {
+ return;
+ }
+ if (value <= 0) {
+ error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
+ object_get_typename(obj), value);
+ return;
+ }
+ backend->prealloc_threads = value;
+}
+
static void host_memory_backend_init(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
MachineState *machine = MACHINE(qdev_get_machine());
+ /* TODO: convert access to globals to compat properties */
backend->merge = machine_mem_merge(machine);
backend->dump = machine_dump_guest_core(machine);
- backend->prealloc = mem_prealloc;
+ backend->reserve = true;
+ backend->prealloc_threads = machine->smp.cpus;
}
static void host_memory_backend_post_init(Object *obj)
return backend->is_mapped;
}
-#ifdef __linux__
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
{
- Object *obj = OBJECT(memdev);
- char *path = object_property_get_str(obj, "mem-path", NULL);
- size_t pagesize = qemu_mempath_getpagesize(path);
-
- g_free(path);
+ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
+ g_assert(pagesize >= qemu_real_host_page_size());
return pagesize;
}
-#else
-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
-{
- return qemu_real_host_page_size;
-}
-#endif
static void
host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(uc);
HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
- MachineState *ms = MACHINE(qdev_get_machine());
Error *local_err = NULL;
void *ptr;
uint64_t sz;
* before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
* this doesn't catch hugepage case. */
unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
+ int mode = backend->policy;
/* check for invalid host-nodes and policies and give more verbose
* error messages than mbind(). */
assert(sizeof(backend->host_nodes) >=
BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
assert(maxnode <= MAX_NODES);
- if (mbind(ptr, sz, backend->policy,
- maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
+
+#ifdef HAVE_NUMA_HAS_PREFERRED_MANY
+ if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
+ /*
+ * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
+ * silently picks the first node.
+ */
+ mode = MPOL_PREFERRED_MANY;
+ }
+#endif
+
+ if (maxnode &&
+ mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
error_setg_errno(errp, errno,
"cannot bind memory to host NUMA nodes");
* specified NUMA policy in place.
*/
if (backend->prealloc) {
- os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
- ms->smp.cpus, &local_err);
+ qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
+ backend->prealloc_threads,
+ backend->prealloc_context, &local_err);
if (local_err) {
goto out;
}
backend->share = value;
}
+#ifdef CONFIG_LINUX
+static bool host_memory_backend_get_reserve(Object *o, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ return backend->reserve;
+}
+
+static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+ if (backend->prealloc && !value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+ backend->reserve = value;
+}
+#endif /* CONFIG_LINUX */
+
static bool
host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
{
object_class_property_add_bool(oc, "merge",
host_memory_backend_get_merge,
- host_memory_backend_set_merge, &error_abort);
+ host_memory_backend_set_merge);
object_class_property_set_description(oc, "merge",
- "Mark memory as mergeable", &error_abort);
+ "Mark memory as mergeable");
object_class_property_add_bool(oc, "dump",
host_memory_backend_get_dump,
- host_memory_backend_set_dump, &error_abort);
+ host_memory_backend_set_dump);
object_class_property_set_description(oc, "dump",
- "Set to 'off' to exclude from core dump", &error_abort);
+ "Set to 'off' to exclude from core dump");
object_class_property_add_bool(oc, "prealloc",
host_memory_backend_get_prealloc,
- host_memory_backend_set_prealloc, &error_abort);
+ host_memory_backend_set_prealloc);
object_class_property_set_description(oc, "prealloc",
- "Preallocate memory", &error_abort);
+ "Preallocate memory");
+ object_class_property_add(oc, "prealloc-threads", "int",
+ host_memory_backend_get_prealloc_threads,
+ host_memory_backend_set_prealloc_threads,
+ NULL, NULL);
+ object_class_property_set_description(oc, "prealloc-threads",
+ "Number of CPU threads to use for prealloc");
+ object_class_property_add_link(oc, "prealloc-context",
+ TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
+ object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
+ object_class_property_set_description(oc, "prealloc-context",
+ "Context to use for creating CPU threads for preallocation");
object_class_property_add(oc, "size", "int",
host_memory_backend_get_size,
host_memory_backend_set_size,
- NULL, NULL, &error_abort);
+ NULL, NULL);
object_class_property_set_description(oc, "size",
- "Size of the memory region (ex: 500M)", &error_abort);
+ "Size of the memory region (ex: 500M)");
object_class_property_add(oc, "host-nodes", "int",
host_memory_backend_get_host_nodes,
host_memory_backend_set_host_nodes,
- NULL, NULL, &error_abort);
+ NULL, NULL);
object_class_property_set_description(oc, "host-nodes",
- "Binds memory to the list of NUMA host nodes", &error_abort);
+ "Binds memory to the list of NUMA host nodes");
object_class_property_add_enum(oc, "policy", "HostMemPolicy",
&HostMemPolicy_lookup,
host_memory_backend_get_policy,
- host_memory_backend_set_policy, &error_abort);
+ host_memory_backend_set_policy);
object_class_property_set_description(oc, "policy",
- "Set the NUMA policy", &error_abort);
+ "Set the NUMA policy");
object_class_property_add_bool(oc, "share",
- host_memory_backend_get_share, host_memory_backend_set_share,
- &error_abort);
+ host_memory_backend_get_share, host_memory_backend_set_share);
object_class_property_set_description(oc, "share",
- "Mark the memory as private to QEMU or shared", &error_abort);
+ "Mark the memory as private to QEMU or shared");
+#ifdef CONFIG_LINUX
+ object_class_property_add_bool(oc, "reserve",
+ host_memory_backend_get_reserve, host_memory_backend_set_reserve);
+ object_class_property_set_description(oc, "reserve",
+ "Reserve swap space (or huge pages) if applicable");
+#endif /* CONFIG_LINUX */
+ /*
+ * Do not delete/rename option. This option must be considered stable
+ * (as if it didn't have the 'x-' prefix including deprecation period) as
+ * long as 4.0 and older machine types exists.
+ * Option will be used by upper layers to override (disable) canonical path
+ * for ramblock-id set by compat properties on old machine types ( <= 4.0),
+ * to keep migration working when backend is used for main RAM with
+ * -machine memory-backend= option (main RAM historically used prefix-less
+ * ramblock-id).
+ */
object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
host_memory_backend_get_use_canonical_path,
- host_memory_backend_set_use_canonical_path, &error_abort);
+ host_memory_backend_set_use_canonical_path);
}
static const TypeInfo host_memory_backend_info = {