#include "qemu/osdep.h"
#include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
#include "hw/boards.h"
#include "qapi/error.h"
#include "qapi/qapi-builtin-visit.h"
#include "qemu/config-file.h"
#include "qom/object_interfaces.h"
#include "qemu/mmap-alloc.h"
+#include "qemu/madvise.h"
#ifdef CONFIG_NUMA
#include <numaif.h>
+#include <numa.h>
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
+/*
+ * HOST_MEM_POLICY_PREFERRED may either translate to MPOL_PREFERRED or
+ * MPOL_PREFERRED_MANY, see comments further below.
+ */
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
host_memory_backend_get_name(HostMemoryBackend *backend)
{
if (!backend->use_canonical_path) {
- return object_get_canonical_path_component(OBJECT(backend));
+ return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
}
return object_get_canonical_path(OBJECT(backend));
void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- Error *local_err = NULL;
uint64_t value;
if (host_memory_backend_mr_inited(backend)) {
- error_setg(&local_err, "cannot change property %s of %s ",
- name, object_get_typename(obj));
- goto out;
+ error_setg(errp, "cannot change property %s of %s ", name,
+ object_get_typename(obj));
+ return;
}
- if (!visit_type_size(v, name, &value, &local_err)) {
- goto out;
+ if (!visit_type_size(v, name, &value, errp)) {
+ return;
}
if (!value) {
- error_setg(&local_err,
+ error_setg(errp,
"property '%s' of %s doesn't take value '%" PRIu64 "'",
name, object_get_typename(obj), value);
- goto out;
+ return;
}
backend->size = value;
-out:
- error_propagate(errp, local_err);
}
static void
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
uint16List *host_nodes = NULL;
- uint16List **node = &host_nodes;
+ uint16List **tail = &host_nodes;
unsigned long value;
value = find_first_bit(backend->host_nodes, MAX_NODES);
goto ret;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ QAPI_LIST_APPEND(tail, value);
do {
value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
break;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ QAPI_LIST_APPEND(tail, value);
} while (true);
ret:
visit_type_uint16List(v, name, &host_nodes, errp);
+ qapi_free_uint16List(host_nodes);
}
static void
static void host_memory_backend_set_prealloc(Object *obj, bool value,
Error **errp)
{
- Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ if (!backend->reserve && value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+
if (!host_memory_backend_mr_inited(backend)) {
backend->prealloc = value;
return;
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
- os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
+ backend->prealloc_context, errp)) {
return;
}
backend->prealloc = true;
const char *name, void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- Error *local_err = NULL;
uint32_t value;
- if (!visit_type_uint32(v, name, &value, &local_err)) {
- goto out;
+ if (!visit_type_uint32(v, name, &value, errp)) {
+ return;
}
if (value <= 0) {
- error_setg(&local_err,
- "property '%s' of %s doesn't take value '%d'",
- name, object_get_typename(obj), value);
- goto out;
+ error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
+ object_get_typename(obj), value);
+ return;
}
backend->prealloc_threads = value;
-out:
- error_propagate(errp, local_err);
}
static void host_memory_backend_init(Object *obj)
/* TODO: convert access to globals to compat properties */
backend->merge = machine_mem_merge(machine);
backend->dump = machine_dump_guest_core(machine);
- backend->prealloc_threads = 1;
+ backend->reserve = true;
+ backend->prealloc_threads = machine->smp.cpus;
}
static void host_memory_backend_post_init(Object *obj)
return backend->is_mapped;
}
-#ifdef __linux__
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
{
- Object *obj = OBJECT(memdev);
- char *path = object_property_get_str(obj, "mem-path", NULL);
- size_t pagesize = qemu_mempath_getpagesize(path);
-
- g_free(path);
+ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
+ g_assert(pagesize >= qemu_real_host_page_size());
return pagesize;
}
-#else
-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
-{
- return qemu_real_host_page_size;
-}
-#endif
static void
host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(uc);
HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
- Error *local_err = NULL;
void *ptr;
uint64_t sz;
- if (bc->alloc) {
- bc->alloc(backend, &local_err);
- if (local_err) {
- goto out;
- }
+ if (!bc->alloc) {
+ return;
+ }
+ if (!bc->alloc(backend, errp)) {
+ return;
+ }
- ptr = memory_region_get_ram_ptr(&backend->mr);
- sz = memory_region_size(&backend->mr);
+ ptr = memory_region_get_ram_ptr(&backend->mr);
+ sz = memory_region_size(&backend->mr);
- if (backend->merge) {
- qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
- }
- if (!backend->dump) {
- qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
- }
+ if (backend->merge) {
+ qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
+ }
+ if (!backend->dump) {
+ qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
+ }
#ifdef CONFIG_NUMA
- unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
- /* lastbit == MAX_NODES means maxnode = 0 */
- unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
- /* ensure policy won't be ignored in case memory is preallocated
- * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
- * this doesn't catch hugepage case. */
- unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
-
- /* check for invalid host-nodes and policies and give more verbose
- * error messages than mbind(). */
- if (maxnode && backend->policy == MPOL_DEFAULT) {
- error_setg(errp, "host-nodes must be empty for policy default,"
- " or you should explicitly specify a policy other"
- " than default");
- return;
- } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
- error_setg(errp, "host-nodes must be set for policy %s",
- HostMemPolicy_str(backend->policy));
- return;
- }
+ unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
+ /* lastbit == MAX_NODES means maxnode = 0 */
+ unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
+ /*
+ * Ensure policy won't be ignored in case memory is preallocated
+ * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
+ * this doesn't catch hugepage case.
+ */
+ unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
+ int mode = backend->policy;
+
+ /* check for invalid host-nodes and policies and give more verbose
+ * error messages than mbind(). */
+ if (maxnode && backend->policy == MPOL_DEFAULT) {
+ error_setg(errp, "host-nodes must be empty for policy default,"
+ " or you should explicitly specify a policy other"
+ " than default");
+ return;
+ } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
+ error_setg(errp, "host-nodes must be set for policy %s",
+ HostMemPolicy_str(backend->policy));
+ return;
+ }
- /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
- * as argument to mbind() due to an old Linux bug (feature?) which
- * cuts off the last specified node. This means backend->host_nodes
- * must have MAX_NODES+1 bits available.
+ /*
+ * We can have up to MAX_NODES nodes, but we need to pass maxnode+1
+ * as argument to mbind() due to an old Linux bug (feature?) which
+ * cuts off the last specified node. This means backend->host_nodes
+ * must have MAX_NODES+1 bits available.
+ */
+ assert(sizeof(backend->host_nodes) >=
+ BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
+ assert(maxnode <= MAX_NODES);
+
+#ifdef HAVE_NUMA_HAS_PREFERRED_MANY
+ if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
+ /*
+ * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
+ * silently picks the first node.
*/
- assert(sizeof(backend->host_nodes) >=
- BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
- assert(maxnode <= MAX_NODES);
-
- if (maxnode &&
- mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
- flags)) {
- if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
- error_setg_errno(errp, errno,
- "cannot bind memory to host NUMA nodes");
- return;
- }
- }
+ mode = MPOL_PREFERRED_MANY;
+ }
#endif
- /* Preallocate memory after the NUMA policy has been instantiated.
- * This is necessary to guarantee memory is allocated with
- * specified NUMA policy in place.
- */
- if (backend->prealloc) {
- os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
- backend->prealloc_threads, &local_err);
- if (local_err) {
- goto out;
- }
+
+ if (maxnode &&
+ mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
+ if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
+ error_setg_errno(errp, errno,
+ "cannot bind memory to host NUMA nodes");
+ return;
}
}
-out:
- error_propagate(errp, local_err);
+#endif
+ /*
+ * Preallocate memory after the NUMA policy has been instantiated.
+ * This is necessary to guarantee memory is allocated with
+ * specified NUMA policy in place.
+ */
+ if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
+ ptr, sz,
+ backend->prealloc_threads,
+ backend->prealloc_context, errp)) {
+ return;
+ }
}
static bool
backend->share = value;
}
+#ifdef CONFIG_LINUX
+static bool host_memory_backend_get_reserve(Object *o, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ return backend->reserve;
+}
+
+static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+ if (backend->prealloc && !value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+ backend->reserve = value;
+}
+#endif /* CONFIG_LINUX */
+
static bool
host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
{
NULL, NULL);
object_class_property_set_description(oc, "prealloc-threads",
"Number of CPU threads to use for prealloc");
+ object_class_property_add_link(oc, "prealloc-context",
+ TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
+ object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
+ object_class_property_set_description(oc, "prealloc-context",
+ "Context to use for creating CPU threads for preallocation");
object_class_property_add(oc, "size", "int",
host_memory_backend_get_size,
host_memory_backend_set_size,
host_memory_backend_get_share, host_memory_backend_set_share);
object_class_property_set_description(oc, "share",
"Mark the memory as private to QEMU or shared");
+#ifdef CONFIG_LINUX
+ object_class_property_add_bool(oc, "reserve",
+ host_memory_backend_get_reserve, host_memory_backend_set_reserve);
+ object_class_property_set_description(oc, "reserve",
+ "Reserve swap space (or huge pages) if applicable");
+#endif /* CONFIG_LINUX */
+ /*
+ * Do not delete/rename option. This option must be considered stable
+ * (as if it didn't have the 'x-' prefix including deprecation period) as
+ * long as 4.0 and older machine types exists.
+ * Option will be used by upper layers to override (disable) canonical path
+ * for ramblock-id set by compat properties on old machine types ( <= 4.0),
+ * to keep migration working when backend is used for main RAM with
+ * -machine memory-backend= option (main RAM historically used prefix-less
+ * ramblock-id).
+ */
object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
host_memory_backend_get_use_canonical_path,
host_memory_backend_set_use_canonical_path);