#include "qemu/osdep.h"
#include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
#include "hw/boards.h"
#include "qapi/error.h"
#include "qapi/qapi-builtin-visit.h"
#include "qemu/config-file.h"
#include "qom/object_interfaces.h"
#include "qemu/mmap-alloc.h"
+#include "qemu/madvise.h"
#ifdef CONFIG_NUMA
#include <numaif.h>
+#include <numa.h>
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
+/*
+ * HOST_MEM_POLICY_PREFERRED may either translate to MPOL_PREFERRED or
+ * MPOL_PREFERRED_MANY, see comments further below.
+ */
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
host_memory_backend_get_name(HostMemoryBackend *backend)
{
if (!backend->use_canonical_path) {
- return object_get_canonical_path_component(OBJECT(backend));
+ return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
}
return object_get_canonical_path(OBJECT(backend));
void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- Error *local_err = NULL;
uint64_t value;
if (host_memory_backend_mr_inited(backend)) {
return;
}
- if (!visit_type_size(v, name, &value, &local_err)) {
- error_propagate(errp, local_err);
+ if (!visit_type_size(v, name, &value, errp)) {
return;
}
if (!value) {
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
uint16List *host_nodes = NULL;
- uint16List **node = &host_nodes;
+ uint16List **tail = &host_nodes;
unsigned long value;
value = find_first_bit(backend->host_nodes, MAX_NODES);
goto ret;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ QAPI_LIST_APPEND(tail, value);
do {
value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
break;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ QAPI_LIST_APPEND(tail, value);
} while (true);
ret:
visit_type_uint16List(v, name, &host_nodes, errp);
+ qapi_free_uint16List(host_nodes);
}
static void
Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ if (!backend->reserve && value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+
if (!host_memory_backend_mr_inited(backend)) {
backend->prealloc = value;
return;
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
- os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
+ qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
+ backend->prealloc_context, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
const char *name, void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- Error *local_err = NULL;
uint32_t value;
- if (!visit_type_uint32(v, name, &value, &local_err)) {
- error_propagate(errp, local_err);
+ if (!visit_type_uint32(v, name, &value, errp)) {
return;
}
if (value <= 0) {
/* TODO: convert access to globals to compat properties */
backend->merge = machine_mem_merge(machine);
backend->dump = machine_dump_guest_core(machine);
- backend->prealloc_threads = 1;
+ backend->reserve = true;
+ backend->prealloc_threads = machine->smp.cpus;
}
static void host_memory_backend_post_init(Object *obj)
return backend->is_mapped;
}
-#ifdef __linux__
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
{
- Object *obj = OBJECT(memdev);
- char *path = object_property_get_str(obj, "mem-path", NULL);
- size_t pagesize = qemu_mempath_getpagesize(path);
-
- g_free(path);
+ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
+ g_assert(pagesize >= qemu_real_host_page_size());
return pagesize;
}
-#else
-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
-{
- return qemu_real_host_page_size;
-}
-#endif
static void
host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
* before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
* this doesn't catch hugepage case. */
unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
+ int mode = backend->policy;
/* check for invalid host-nodes and policies and give more verbose
* error messages than mbind(). */
BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
assert(maxnode <= MAX_NODES);
+#ifdef HAVE_NUMA_HAS_PREFERRED_MANY
+ if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
+ /*
+ * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
+ * silently picks the first node.
+ */
+ mode = MPOL_PREFERRED_MANY;
+ }
+#endif
+
if (maxnode &&
- mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
- flags)) {
+ mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
error_setg_errno(errp, errno,
"cannot bind memory to host NUMA nodes");
* specified NUMA policy in place.
*/
if (backend->prealloc) {
- os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
- backend->prealloc_threads, &local_err);
+ qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
+ backend->prealloc_threads,
+ backend->prealloc_context, &local_err);
if (local_err) {
goto out;
}
backend->share = value;
}
+#ifdef CONFIG_LINUX
+static bool host_memory_backend_get_reserve(Object *o, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ return backend->reserve;
+}
+
+static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+ if (host_memory_backend_mr_inited(backend)) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+ if (backend->prealloc && !value) {
+ error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+ return;
+ }
+ backend->reserve = value;
+}
+#endif /* CONFIG_LINUX */
+
static bool
host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
{
NULL, NULL);
object_class_property_set_description(oc, "prealloc-threads",
"Number of CPU threads to use for prealloc");
+ object_class_property_add_link(oc, "prealloc-context",
+ TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
+ object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
+ object_class_property_set_description(oc, "prealloc-context",
+ "Context to use for creating CPU threads for preallocation");
object_class_property_add(oc, "size", "int",
host_memory_backend_get_size,
host_memory_backend_set_size,
host_memory_backend_get_share, host_memory_backend_set_share);
object_class_property_set_description(oc, "share",
"Mark the memory as private to QEMU or shared");
+#ifdef CONFIG_LINUX
+ object_class_property_add_bool(oc, "reserve",
+ host_memory_backend_get_reserve, host_memory_backend_set_reserve);
+ object_class_property_set_description(oc, "reserve",
+ "Reserve swap space (or huge pages) if applicable");
+#endif /* CONFIG_LINUX */
+ /*
+ * Do not delete/rename option. This option must be considered stable
+ * (as if it didn't have the 'x-' prefix including deprecation period) as
+ * long as 4.0 and older machine types exists.
+ * Option will be used by upper layers to override (disable) canonical path
+ * for ramblock-id set by compat properties on old machine types ( <= 4.0),
+ * to keep migration working when backend is used for main RAM with
+ * -machine memory-backend= option (main RAM historically used prefix-less
+ * ramblock-id).
+ */
object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
host_memory_backend_get_use_canonical_path,
host_memory_backend_set_use_canonical_path);