]> git.proxmox.com Git - mirror_qemu.git/blob - hw/mem/memory-device.c
Merge tag 'pull-tcg-20231114' of https://gitlab.com/rth7680/qemu into staging
[mirror_qemu.git] / hw / mem / memory-device.c
1 /*
2 * Memory Device Interface
3 *
4 * Copyright ProfitBricks GmbH 2012
5 * Copyright (C) 2014 Red Hat Inc
6 * Copyright (c) 2018 Red Hat Inc
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 */
11
12 #include "qemu/osdep.h"
13 #include "qemu/error-report.h"
14 #include "hw/mem/memory-device.h"
15 #include "qapi/error.h"
16 #include "hw/boards.h"
17 #include "qemu/range.h"
18 #include "hw/virtio/vhost.h"
19 #include "sysemu/kvm.h"
20 #include "exec/address-spaces.h"
21 #include "trace.h"
22
23 static bool memory_device_is_empty(const MemoryDeviceState *md)
24 {
25 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
26 Error *local_err = NULL;
27 MemoryRegion *mr;
28
29 /* dropping const here is fine as we don't touch the memory region */
30 mr = mdc->get_memory_region((MemoryDeviceState *)md, &local_err);
31 if (local_err) {
32 /* Not empty, we'll report errors later when ontaining the MR again. */
33 error_free(local_err);
34 return false;
35 }
36 return !mr;
37 }
38
39 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
40 {
41 const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
42 const MemoryDeviceState *md_b = MEMORY_DEVICE(b);
43 const MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a);
44 const MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b);
45 const uint64_t addr_a = mdc_a->get_addr(md_a);
46 const uint64_t addr_b = mdc_b->get_addr(md_b);
47
48 if (addr_a > addr_b) {
49 return 1;
50 } else if (addr_a < addr_b) {
51 return -1;
52 }
53 return 0;
54 }
55
56 static int memory_device_build_list(Object *obj, void *opaque)
57 {
58 GSList **list = opaque;
59
60 if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
61 DeviceState *dev = DEVICE(obj);
62 if (dev->realized) { /* only realized memory devices matter */
63 *list = g_slist_insert_sorted(*list, dev, memory_device_addr_sort);
64 }
65 }
66
67 object_child_foreach(obj, memory_device_build_list, opaque);
68 return 0;
69 }
70
71 static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
72 {
73 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
74
75 if (mdc->get_memslots) {
76 return mdc->get_memslots(md);
77 }
78 return 1;
79 }
80
81 /*
82 * Memslots that are reserved by memory devices (required but still reported
83 * as free from KVM / vhost).
84 */
85 static unsigned int get_reserved_memslots(MachineState *ms)
86 {
87 if (ms->device_memory->used_memslots >
88 ms->device_memory->required_memslots) {
89 /* This is unexpected, and we warned already in the memory notifier. */
90 return 0;
91 }
92 return ms->device_memory->required_memslots -
93 ms->device_memory->used_memslots;
94 }
95
96 unsigned int memory_devices_get_reserved_memslots(void)
97 {
98 if (!current_machine->device_memory) {
99 return 0;
100 }
101 return get_reserved_memslots(current_machine);
102 }
103
104 bool memory_devices_memslot_auto_decision_active(void)
105 {
106 if (!current_machine->device_memory) {
107 return false;
108 }
109
110 return current_machine->device_memory->memslot_auto_decision_active;
111 }
112
113 static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
114 MemoryRegion *mr)
115 {
116 const unsigned int reserved = get_reserved_memslots(ms);
117 const uint64_t size = memory_region_size(mr);
118 unsigned int max = vhost_get_max_memslots();
119 unsigned int free = vhost_get_free_memslots();
120 uint64_t available_space;
121 unsigned int memslots;
122
123 if (kvm_enabled()) {
124 max = MIN(max, kvm_get_max_memslots());
125 free = MIN(free, kvm_get_free_memslots());
126 }
127
128 /*
129 * If we only have less overall memslots than what we consider reasonable,
130 * just keep it to a minimum.
131 */
132 if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
133 return 1;
134 }
135
136 /*
137 * Consider our soft-limit across all memory devices. We don't really
138 * expect to exceed this limit in reasonable configurations.
139 */
140 if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
141 ms->device_memory->required_memslots) {
142 return 1;
143 }
144 memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
145 ms->device_memory->required_memslots;
146
147 /*
148 * Consider the actually still free memslots. This is only relevant if
149 * other memslot consumers would consume *significantly* more memslots than
150 * what we prepared for (> 253). Unlikely, but let's just handle it
151 * cleanly.
152 */
153 memslots = MIN(memslots, free - reserved);
154 if (memslots < 1 || unlikely(free < reserved)) {
155 return 1;
156 }
157
158 /* We cannot have any other memory devices? So give all to this device. */
159 if (size == ms->maxram_size - ms->ram_size) {
160 return memslots;
161 }
162
163 /*
164 * Simple heuristic: equally distribute the memslots over the space
165 * still available for memory devices.
166 */
167 available_space = ms->maxram_size - ms->ram_size -
168 ms->device_memory->used_region_size;
169 memslots = (double)memslots * size / available_space;
170 return memslots < 1 ? 1 : memslots;
171 }
172
173 static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
174 MemoryRegion *mr, Error **errp)
175 {
176 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
177 const uint64_t used_region_size = ms->device_memory->used_region_size;
178 const uint64_t size = memory_region_size(mr);
179 const unsigned int reserved_memslots = get_reserved_memslots(ms);
180 unsigned int required_memslots, memslot_limit;
181
182 /*
183 * Instruct the device to decide how many memslots to use, if applicable,
184 * before we query the number of required memslots the first time.
185 */
186 if (mdc->decide_memslots) {
187 memslot_limit = memory_device_memslot_decision_limit(ms, mr);
188 mdc->decide_memslots(md, memslot_limit);
189 }
190 required_memslots = memory_device_get_memslots(md);
191
192 /* we will need memory slots for kvm and vhost */
193 if (kvm_enabled() &&
194 kvm_get_free_memslots() < required_memslots + reserved_memslots) {
195 error_setg(errp, "hypervisor has not enough free memory slots left");
196 return;
197 }
198 if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
199 error_setg(errp, "a used vhost backend has not enough free memory slots left");
200 return;
201 }
202
203 /* will we exceed the total amount of memory specified */
204 if (used_region_size + size < used_region_size ||
205 used_region_size + size > ms->maxram_size - ms->ram_size) {
206 error_setg(errp, "not enough space, currently 0x%" PRIx64
207 " in use of total space for memory devices 0x" RAM_ADDR_FMT,
208 used_region_size, ms->maxram_size - ms->ram_size);
209 return;
210 }
211
212 }
213
214 static uint64_t memory_device_get_free_addr(MachineState *ms,
215 const uint64_t *hint,
216 uint64_t align, uint64_t size,
217 Error **errp)
218 {
219 GSList *list = NULL, *item;
220 Range as, new = range_empty;
221
222 range_init_nofail(&as, ms->device_memory->base,
223 memory_region_size(&ms->device_memory->mr));
224
225 /* start of address space indicates the maximum alignment we expect */
226 if (!QEMU_IS_ALIGNED(range_lob(&as), align)) {
227 warn_report("the alignment (0x%" PRIx64 ") exceeds the expected"
228 " maximum alignment, memory will get fragmented and not"
229 " all 'maxmem' might be usable for memory devices.",
230 align);
231 }
232
233 if (hint && !QEMU_IS_ALIGNED(*hint, align)) {
234 error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes",
235 align);
236 return 0;
237 }
238
239 if (hint) {
240 if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
241 error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
242 "], usable range for memory devices [0x%" PRIx64 ":0x%"
243 PRIx64 "]", *hint, size, range_lob(&as),
244 range_size(&as));
245 return 0;
246 }
247 } else {
248 if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) {
249 error_setg(errp, "can't add memory device, device too big");
250 return 0;
251 }
252 }
253
254 /* find address range that will fit new memory device */
255 object_child_foreach(OBJECT(ms), memory_device_build_list, &list);
256 for (item = list; item; item = g_slist_next(item)) {
257 const MemoryDeviceState *md = item->data;
258 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(OBJECT(md));
259 uint64_t next_addr;
260 Range tmp;
261
262 if (memory_device_is_empty(md)) {
263 continue;
264 }
265
266 range_init_nofail(&tmp, mdc->get_addr(md),
267 memory_device_get_region_size(md, &error_abort));
268
269 if (range_overlaps_range(&tmp, &new)) {
270 if (hint) {
271 const DeviceState *d = DEVICE(md);
272 error_setg(errp, "address range conflicts with memory device"
273 " id='%s'", d->id ? d->id : "(unnamed)");
274 goto out;
275 }
276
277 next_addr = QEMU_ALIGN_UP(range_upb(&tmp) + 1, align);
278 if (!next_addr || range_init(&new, next_addr, range_size(&new))) {
279 range_make_empty(&new);
280 break;
281 }
282 } else if (range_lob(&tmp) > range_upb(&new)) {
283 break;
284 }
285 }
286
287 if (!range_contains_range(&as, &new)) {
288 error_setg(errp, "could not find position in guest address space for "
289 "memory device - memory fragmented due to alignments");
290 }
291 out:
292 g_slist_free(list);
293 return range_lob(&new);
294 }
295
296 MemoryDeviceInfoList *qmp_memory_device_list(void)
297 {
298 GSList *devices = NULL, *item;
299 MemoryDeviceInfoList *list = NULL, **tail = &list;
300
301 object_child_foreach(qdev_get_machine(), memory_device_build_list,
302 &devices);
303
304 for (item = devices; item; item = g_slist_next(item)) {
305 const MemoryDeviceState *md = MEMORY_DEVICE(item->data);
306 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
307 MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
308
309 /* Let's query infotmation even for empty memory devices. */
310 mdc->fill_device_info(md, info);
311
312 QAPI_LIST_APPEND(tail, info);
313 }
314
315 g_slist_free(devices);
316
317 return list;
318 }
319
320 static int memory_device_plugged_size(Object *obj, void *opaque)
321 {
322 uint64_t *size = opaque;
323
324 if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
325 const DeviceState *dev = DEVICE(obj);
326 const MemoryDeviceState *md = MEMORY_DEVICE(obj);
327 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
328
329 if (dev->realized && !memory_device_is_empty(md)) {
330 *size += mdc->get_plugged_size(md, &error_abort);
331 }
332 }
333
334 object_child_foreach(obj, memory_device_plugged_size, opaque);
335 return 0;
336 }
337
338 uint64_t get_plugged_memory_size(void)
339 {
340 uint64_t size = 0;
341
342 memory_device_plugged_size(qdev_get_machine(), &size);
343
344 return size;
345 }
346
347 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
348 const uint64_t *legacy_align, Error **errp)
349 {
350 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
351 Error *local_err = NULL;
352 uint64_t addr, align = 0;
353 MemoryRegion *mr;
354
355 /* We support empty memory devices even without device memory. */
356 if (memory_device_is_empty(md)) {
357 return;
358 }
359
360 if (!ms->device_memory) {
361 error_setg(errp, "the configuration is not prepared for memory devices"
362 " (e.g., for memory hotplug), consider specifying the"
363 " maxmem option");
364 return;
365 }
366
367 mr = mdc->get_memory_region(md, &local_err);
368 if (local_err) {
369 goto out;
370 }
371
372 memory_device_check_addable(ms, md, mr, &local_err);
373 if (local_err) {
374 goto out;
375 }
376
377 if (legacy_align) {
378 align = *legacy_align;
379 } else {
380 if (mdc->get_min_alignment) {
381 align = mdc->get_min_alignment(md);
382 }
383 align = MAX(align, memory_region_get_alignment(mr));
384 }
385 addr = mdc->get_addr(md);
386 addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align,
387 memory_region_size(mr), &local_err);
388 if (local_err) {
389 goto out;
390 }
391 mdc->set_addr(md, addr, &local_err);
392 if (!local_err) {
393 trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "",
394 addr);
395 }
396 out:
397 error_propagate(errp, local_err);
398 }
399
400 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
401 {
402 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
403 unsigned int memslots;
404 uint64_t addr;
405 MemoryRegion *mr;
406
407 if (memory_device_is_empty(md)) {
408 return;
409 }
410
411 memslots = memory_device_get_memslots(md);
412 addr = mdc->get_addr(md);
413
414 /*
415 * We expect that a previous call to memory_device_pre_plug() succeeded, so
416 * it can't fail at this point.
417 */
418 mr = mdc->get_memory_region(md, &error_abort);
419 g_assert(ms->device_memory);
420
421 ms->device_memory->used_region_size += memory_region_size(mr);
422 ms->device_memory->required_memslots += memslots;
423 if (mdc->decide_memslots && memslots > 1) {
424 ms->device_memory->memslot_auto_decision_active++;
425 }
426
427 memory_region_add_subregion(&ms->device_memory->mr,
428 addr - ms->device_memory->base, mr);
429 trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
430 }
431
432 void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
433 {
434 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
435 const unsigned int memslots = memory_device_get_memslots(md);
436 MemoryRegion *mr;
437
438 if (memory_device_is_empty(md)) {
439 return;
440 }
441
442 /*
443 * We expect that a previous call to memory_device_pre_plug() succeeded, so
444 * it can't fail at this point.
445 */
446 mr = mdc->get_memory_region(md, &error_abort);
447 g_assert(ms->device_memory);
448
449 memory_region_del_subregion(&ms->device_memory->mr, mr);
450
451 if (mdc->decide_memslots && memslots > 1) {
452 ms->device_memory->memslot_auto_decision_active--;
453 }
454 ms->device_memory->used_region_size -= memory_region_size(mr);
455 ms->device_memory->required_memslots -= memslots;
456 trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
457 mdc->get_addr(md));
458 }
459
460 uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
461 Error **errp)
462 {
463 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
464 MemoryRegion *mr;
465
466 /* dropping const here is fine as we don't touch the memory region */
467 mr = mdc->get_memory_region((MemoryDeviceState *)md, errp);
468 if (!mr) {
469 return 0;
470 }
471
472 return memory_region_size(mr);
473 }
474
475 static void memory_devices_region_mod(MemoryListener *listener,
476 MemoryRegionSection *mrs, bool add)
477 {
478 DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
479 listener);
480
481 if (!memory_region_is_ram(mrs->mr)) {
482 warn_report("Unexpected memory region mapped into device memory region.");
483 return;
484 }
485
486 /*
487 * The expectation is that each distinct RAM memory region section in
488 * our region for memory devices consumes exactly one memslot in KVM
489 * and in vhost. For vhost, this is true, except:
490 * * ROM memory regions don't consume a memslot. These get used very
491 * rarely for memory devices (R/O NVDIMMs).
492 * * Memslots without a fd (memory-backend-ram) don't necessarily
493 * consume a memslot. Such setups are quite rare and possibly bogus:
494 * the memory would be inaccessible by such vhost devices.
495 *
496 * So for vhost, in corner cases we might over-estimate the number of
497 * memslots that are currently used or that might still be reserved
498 * (required - used).
499 */
500 dms->used_memslots += add ? 1 : -1;
501
502 if (dms->used_memslots > dms->required_memslots) {
503 warn_report("Memory devices use more memory slots than indicated as required.");
504 }
505 }
506
507 static void memory_devices_region_add(MemoryListener *listener,
508 MemoryRegionSection *mrs)
509 {
510 return memory_devices_region_mod(listener, mrs, true);
511 }
512
513 static void memory_devices_region_del(MemoryListener *listener,
514 MemoryRegionSection *mrs)
515 {
516 return memory_devices_region_mod(listener, mrs, false);
517 }
518
519 void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
520 {
521 g_assert(size);
522 g_assert(!ms->device_memory);
523 ms->device_memory = g_new0(DeviceMemoryState, 1);
524 ms->device_memory->base = base;
525
526 memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
527 size);
528 address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
529 "device-memory");
530 memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
531 &ms->device_memory->mr);
532
533 /* Track the number of memslots used by memory devices. */
534 ms->device_memory->listener.region_add = memory_devices_region_add;
535 ms->device_memory->listener.region_del = memory_devices_region_del;
536 memory_listener_register(&ms->device_memory->listener,
537 &ms->device_memory->as);
538 }
539
540 static const TypeInfo memory_device_info = {
541 .name = TYPE_MEMORY_DEVICE,
542 .parent = TYPE_INTERFACE,
543 .class_size = sizeof(MemoryDeviceClass),
544 };
545
546 static void memory_device_register_types(void)
547 {
548 type_register_static(&memory_device_info);
549 }
550
551 type_init(memory_device_register_types)