]>
Commit | Line | Data |
---|---|---|
1f070489 IM |
1 | /* |
2 | * QEMU Host Memory Backend | |
3 | * | |
4 | * Copyright (C) 2013-2014 Red Hat Inc | |
5 | * | |
6 | * Authors: | |
7 | * Igor Mammedov <imammedo@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | #include "sysemu/hostmem.h" | |
1f070489 | 13 | #include "qapi/visitor.h" |
4cf1b76b HT |
14 | #include "qapi-types.h" |
15 | #include "qapi-visit.h" | |
1f070489 IM |
16 | #include "qapi/qmp/qerror.h" |
17 | #include "qemu/config-file.h" | |
18 | #include "qom/object_interfaces.h" | |
19 | ||
4cf1b76b HT |
20 | #ifdef CONFIG_NUMA |
21 | #include <numaif.h> | |
22 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); | |
23 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); | |
24 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); | |
25 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); | |
26 | #endif | |
27 | ||
1f070489 | 28 | static void |
58f4662c HT |
29 | host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque, |
30 | const char *name, Error **errp) | |
1f070489 IM |
31 | { |
32 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
33 | uint64_t value = backend->size; | |
34 | ||
35 | visit_type_size(v, &value, name, errp); | |
36 | } | |
37 | ||
38 | static void | |
58f4662c HT |
39 | host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque, |
40 | const char *name, Error **errp) | |
1f070489 IM |
41 | { |
42 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
43 | Error *local_err = NULL; | |
44 | uint64_t value; | |
45 | ||
46 | if (memory_region_size(&backend->mr)) { | |
47 | error_setg(&local_err, "cannot change property value"); | |
48 | goto out; | |
49 | } | |
50 | ||
51 | visit_type_size(v, &value, name, &local_err); | |
52 | if (local_err) { | |
53 | goto out; | |
54 | } | |
55 | if (!value) { | |
56 | error_setg(&local_err, "Property '%s.%s' doesn't take value '%" | |
57 | PRIu64 "'", object_get_typename(obj), name, value); | |
58 | goto out; | |
59 | } | |
60 | backend->size = value; | |
61 | out: | |
62 | error_propagate(errp, local_err); | |
63 | } | |
64 | ||
4cf1b76b HT |
65 | static void |
66 | host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque, | |
67 | const char *name, Error **errp) | |
68 | { | |
69 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
70 | uint16List *host_nodes = NULL; | |
71 | uint16List **node = &host_nodes; | |
72 | unsigned long value; | |
73 | ||
74 | value = find_first_bit(backend->host_nodes, MAX_NODES); | |
75 | if (value == MAX_NODES) { | |
76 | return; | |
77 | } | |
78 | ||
79 | *node = g_malloc0(sizeof(**node)); | |
80 | (*node)->value = value; | |
81 | node = &(*node)->next; | |
82 | ||
83 | do { | |
84 | value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); | |
85 | if (value == MAX_NODES) { | |
86 | break; | |
87 | } | |
88 | ||
89 | *node = g_malloc0(sizeof(**node)); | |
90 | (*node)->value = value; | |
91 | node = &(*node)->next; | |
92 | } while (true); | |
93 | ||
94 | visit_type_uint16List(v, &host_nodes, name, errp); | |
95 | } | |
96 | ||
97 | static void | |
98 | host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque, | |
99 | const char *name, Error **errp) | |
100 | { | |
101 | #ifdef CONFIG_NUMA | |
102 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
103 | uint16List *l = NULL; | |
104 | ||
105 | visit_type_uint16List(v, &l, name, errp); | |
106 | ||
107 | while (l) { | |
108 | bitmap_set(backend->host_nodes, l->value, 1); | |
109 | l = l->next; | |
110 | } | |
111 | #else | |
112 | error_setg(errp, "NUMA node binding are not supported by this QEMU"); | |
113 | #endif | |
114 | } | |
115 | ||
116 | static void | |
117 | host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque, | |
118 | const char *name, Error **errp) | |
119 | { | |
120 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
121 | int policy = backend->policy; | |
122 | ||
123 | visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); | |
124 | } | |
125 | ||
126 | static void | |
127 | host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque, | |
128 | const char *name, Error **errp) | |
129 | { | |
130 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
131 | int policy; | |
132 | ||
133 | visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); | |
134 | backend->policy = policy; | |
135 | ||
136 | #ifndef CONFIG_NUMA | |
137 | if (policy != HOST_MEM_POLICY_DEFAULT) { | |
138 | error_setg(errp, "NUMA policies are not supported by this QEMU"); | |
139 | } | |
140 | #endif | |
141 | } | |
142 | ||
605d0a94 PB |
143 | static bool host_memory_backend_get_merge(Object *obj, Error **errp) |
144 | { | |
145 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
146 | ||
147 | return backend->merge; | |
148 | } | |
149 | ||
150 | static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) | |
151 | { | |
152 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
153 | ||
154 | if (!memory_region_size(&backend->mr)) { | |
155 | backend->merge = value; | |
156 | return; | |
157 | } | |
158 | ||
159 | if (value != backend->merge) { | |
160 | void *ptr = memory_region_get_ram_ptr(&backend->mr); | |
161 | uint64_t sz = memory_region_size(&backend->mr); | |
162 | ||
163 | qemu_madvise(ptr, sz, | |
164 | value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); | |
165 | backend->merge = value; | |
166 | } | |
167 | } | |
168 | ||
169 | static bool host_memory_backend_get_dump(Object *obj, Error **errp) | |
170 | { | |
171 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
172 | ||
173 | return backend->dump; | |
174 | } | |
175 | ||
176 | static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) | |
177 | { | |
178 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
179 | ||
180 | if (!memory_region_size(&backend->mr)) { | |
181 | backend->dump = value; | |
182 | return; | |
183 | } | |
184 | ||
185 | if (value != backend->dump) { | |
186 | void *ptr = memory_region_get_ram_ptr(&backend->mr); | |
187 | uint64_t sz = memory_region_size(&backend->mr); | |
188 | ||
189 | qemu_madvise(ptr, sz, | |
190 | value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); | |
191 | backend->dump = value; | |
192 | } | |
193 | } | |
194 | ||
a35ba7be PB |
195 | static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) |
196 | { | |
197 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
198 | ||
199 | return backend->prealloc || backend->force_prealloc; | |
200 | } | |
201 | ||
202 | static void host_memory_backend_set_prealloc(Object *obj, bool value, | |
203 | Error **errp) | |
204 | { | |
205 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); | |
206 | ||
207 | if (backend->force_prealloc) { | |
208 | if (value) { | |
209 | error_setg(errp, | |
210 | "remove -mem-prealloc to use the prealloc property"); | |
211 | return; | |
212 | } | |
213 | } | |
214 | ||
215 | if (!memory_region_size(&backend->mr)) { | |
216 | backend->prealloc = value; | |
217 | return; | |
218 | } | |
219 | ||
220 | if (value && !backend->prealloc) { | |
221 | int fd = memory_region_get_fd(&backend->mr); | |
222 | void *ptr = memory_region_get_ram_ptr(&backend->mr); | |
223 | uint64_t sz = memory_region_size(&backend->mr); | |
224 | ||
225 | os_mem_prealloc(fd, ptr, sz); | |
226 | backend->prealloc = true; | |
227 | } | |
228 | } | |
229 | ||
58f4662c | 230 | static void host_memory_backend_init(Object *obj) |
1f070489 | 231 | { |
605d0a94 PB |
232 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
233 | ||
234 | backend->merge = qemu_opt_get_bool(qemu_get_machine_opts(), | |
235 | "mem-merge", true); | |
236 | backend->dump = qemu_opt_get_bool(qemu_get_machine_opts(), | |
237 | "dump-guest-core", true); | |
a35ba7be | 238 | backend->prealloc = mem_prealloc; |
605d0a94 PB |
239 | |
240 | object_property_add_bool(obj, "merge", | |
241 | host_memory_backend_get_merge, | |
242 | host_memory_backend_set_merge, NULL); | |
243 | object_property_add_bool(obj, "dump", | |
244 | host_memory_backend_get_dump, | |
245 | host_memory_backend_set_dump, NULL); | |
a35ba7be PB |
246 | object_property_add_bool(obj, "prealloc", |
247 | host_memory_backend_get_prealloc, | |
248 | host_memory_backend_set_prealloc, NULL); | |
1f070489 | 249 | object_property_add(obj, "size", "int", |
58f4662c HT |
250 | host_memory_backend_get_size, |
251 | host_memory_backend_set_size, NULL, NULL, NULL); | |
4cf1b76b HT |
252 | object_property_add(obj, "host-nodes", "int", |
253 | host_memory_backend_get_host_nodes, | |
254 | host_memory_backend_set_host_nodes, NULL, NULL, NULL); | |
255 | object_property_add(obj, "policy", "str", | |
256 | host_memory_backend_get_policy, | |
257 | host_memory_backend_set_policy, NULL, NULL, NULL); | |
1f070489 IM |
258 | } |
259 | ||
1f070489 IM |
260 | MemoryRegion * |
261 | host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) | |
262 | { | |
263 | return memory_region_size(&backend->mr) ? &backend->mr : NULL; | |
264 | } | |
265 | ||
bd9262d9 HT |
266 | static void |
267 | host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) | |
268 | { | |
269 | HostMemoryBackend *backend = MEMORY_BACKEND(uc); | |
270 | HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); | |
605d0a94 PB |
271 | Error *local_err = NULL; |
272 | void *ptr; | |
273 | uint64_t sz; | |
bd9262d9 HT |
274 | |
275 | if (bc->alloc) { | |
605d0a94 PB |
276 | bc->alloc(backend, &local_err); |
277 | if (local_err) { | |
278 | error_propagate(errp, local_err); | |
279 | return; | |
280 | } | |
281 | ||
282 | ptr = memory_region_get_ram_ptr(&backend->mr); | |
283 | sz = memory_region_size(&backend->mr); | |
284 | ||
285 | if (backend->merge) { | |
286 | qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); | |
287 | } | |
288 | if (!backend->dump) { | |
289 | qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); | |
290 | } | |
4cf1b76b HT |
291 | #ifdef CONFIG_NUMA |
292 | unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); | |
293 | /* lastbit == MAX_NODES means maxnode = 0 */ | |
294 | unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); | |
295 | /* ensure policy won't be ignored in case memory is preallocated | |
296 | * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so | |
297 | * this doesn't catch hugepage case. */ | |
288d3322 | 298 | unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; |
4cf1b76b HT |
299 | |
300 | /* check for invalid host-nodes and policies and give more verbose | |
301 | * error messages than mbind(). */ | |
302 | if (maxnode && backend->policy == MPOL_DEFAULT) { | |
303 | error_setg(errp, "host-nodes must be empty for policy default," | |
304 | " or you should explicitly specify a policy other" | |
305 | " than default"); | |
306 | return; | |
307 | } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { | |
308 | error_setg(errp, "host-nodes must be set for policy %s", | |
309 | HostMemPolicy_lookup[backend->policy]); | |
310 | return; | |
311 | } | |
312 | ||
313 | /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 | |
314 | * as argument to mbind() due to an old Linux bug (feature?) which | |
315 | * cuts off the last specified node. This means backend->host_nodes | |
316 | * must have MAX_NODES+1 bits available. | |
317 | */ | |
318 | assert(sizeof(backend->host_nodes) >= | |
319 | BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); | |
320 | assert(maxnode <= MAX_NODES); | |
321 | if (mbind(ptr, sz, backend->policy, | |
322 | maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { | |
323 | error_setg_errno(errp, errno, | |
324 | "cannot bind memory to host NUMA nodes"); | |
325 | return; | |
326 | } | |
327 | #endif | |
328 | /* Preallocate memory after the NUMA policy has been instantiated. | |
329 | * This is necessary to guarantee memory is allocated with | |
330 | * specified NUMA policy in place. | |
331 | */ | |
a35ba7be PB |
332 | if (backend->prealloc) { |
333 | os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); | |
334 | } | |
bd9262d9 HT |
335 | } |
336 | } | |
337 | ||
36bce5ca LM |
338 | static bool |
339 | host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp) | |
340 | { | |
341 | MemoryRegion *mr; | |
342 | ||
343 | mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp); | |
344 | if (memory_region_is_mapped(mr)) { | |
345 | return false; | |
346 | } else { | |
347 | return true; | |
348 | } | |
349 | } | |
350 | ||
bd9262d9 HT |
351 | static void |
352 | host_memory_backend_class_init(ObjectClass *oc, void *data) | |
353 | { | |
354 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); | |
355 | ||
356 | ucc->complete = host_memory_backend_memory_complete; | |
36bce5ca | 357 | ucc->can_be_deleted = host_memory_backend_can_be_deleted; |
bd9262d9 HT |
358 | } |
359 | ||
58f4662c | 360 | static const TypeInfo host_memory_backend_info = { |
1f070489 IM |
361 | .name = TYPE_MEMORY_BACKEND, |
362 | .parent = TYPE_OBJECT, | |
363 | .abstract = true, | |
364 | .class_size = sizeof(HostMemoryBackendClass), | |
bd9262d9 | 365 | .class_init = host_memory_backend_class_init, |
1f070489 | 366 | .instance_size = sizeof(HostMemoryBackend), |
58f4662c | 367 | .instance_init = host_memory_backend_init, |
1f070489 IM |
368 | .interfaces = (InterfaceInfo[]) { |
369 | { TYPE_USER_CREATABLE }, | |
370 | { } | |
371 | } | |
372 | }; | |
373 | ||
374 | static void register_types(void) | |
375 | { | |
58f4662c | 376 | type_register_static(&host_memory_backend_info); |
1f070489 IM |
377 | } |
378 | ||
379 | type_init(register_types); |