]> git.proxmox.com Git - mirror_qemu.git/blob - include/exec/ram_addr.h
ram: Split dirty bitmap by RAMBlock
[mirror_qemu.git] / include / exec / ram_addr.h
1 /*
2 * Declarations for cpu physical memory functions
3 *
4 * Copyright 2011 Red Hat, Inc. and/or its affiliates
5 *
6 * Authors:
7 * Avi Kivity <avi@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 *
12 */
13
14 /*
15 * This header is for use by exec.c and memory.c ONLY. Do not include it.
16 * The functions declared here will be removed soon.
17 */
18
19 #ifndef RAM_ADDR_H
20 #define RAM_ADDR_H
21
22 #ifndef CONFIG_USER_ONLY
23 #include "hw/xen/xen.h"
24 #include "exec/ramlist.h"
25
26 struct RAMBlock {
27 struct rcu_head rcu;
28 struct MemoryRegion *mr;
29 uint8_t *host;
30 ram_addr_t offset;
31 ram_addr_t used_length;
32 ram_addr_t max_length;
33 void (*resized)(const char*, uint64_t length, void *host);
34 uint32_t flags;
35 /* Protected by iothread lock. */
36 char idstr[256];
37 /* RCU-enabled, writes protected by the ramlist lock */
38 QLIST_ENTRY(RAMBlock) next;
39 QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
40 int fd;
41 size_t page_size;
42 /* dirty bitmap used during migration */
43 unsigned long *bmap;
44 /* bitmap of pages that haven't been sent even once
45 * only maintained and used in postcopy at the moment
46 * where it's used to send the dirtymap at the start
47 * of the postcopy phase
48 */
49 unsigned long *unsentmap;
50 };
51
52 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
53 {
54 return (b && b->host && offset < b->used_length) ? true : false;
55 }
56
57 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
58 {
59 assert(offset_in_ramblock(block, offset));
60 return (char *)block->host + offset;
61 }
62
63 long qemu_getrampagesize(void);
64 unsigned long last_ram_page(void);
65 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
66 bool share, const char *mem_path,
67 Error **errp);
68 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
69 MemoryRegion *mr, Error **errp);
70 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp);
71 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
72 void (*resized)(const char*,
73 uint64_t length,
74 void *host),
75 MemoryRegion *mr, Error **errp);
76 void qemu_ram_free(RAMBlock *block);
77
78 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
79
80 #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1)
81 #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
82
83 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
84 ram_addr_t length,
85 unsigned client)
86 {
87 DirtyMemoryBlocks *blocks;
88 unsigned long end, page;
89 unsigned long idx, offset, base;
90 bool dirty = false;
91
92 assert(client < DIRTY_MEMORY_NUM);
93
94 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
95 page = start >> TARGET_PAGE_BITS;
96
97 rcu_read_lock();
98
99 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
100
101 idx = page / DIRTY_MEMORY_BLOCK_SIZE;
102 offset = page % DIRTY_MEMORY_BLOCK_SIZE;
103 base = page - offset;
104 while (page < end) {
105 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
106 unsigned long num = next - base;
107 unsigned long found = find_next_bit(blocks->blocks[idx], num, offset);
108 if (found < num) {
109 dirty = true;
110 break;
111 }
112
113 page = next;
114 idx++;
115 offset = 0;
116 base += DIRTY_MEMORY_BLOCK_SIZE;
117 }
118
119 rcu_read_unlock();
120
121 return dirty;
122 }
123
124 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
125 ram_addr_t length,
126 unsigned client)
127 {
128 DirtyMemoryBlocks *blocks;
129 unsigned long end, page;
130 unsigned long idx, offset, base;
131 bool dirty = true;
132
133 assert(client < DIRTY_MEMORY_NUM);
134
135 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
136 page = start >> TARGET_PAGE_BITS;
137
138 rcu_read_lock();
139
140 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
141
142 idx = page / DIRTY_MEMORY_BLOCK_SIZE;
143 offset = page % DIRTY_MEMORY_BLOCK_SIZE;
144 base = page - offset;
145 while (page < end) {
146 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
147 unsigned long num = next - base;
148 unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
149 if (found < num) {
150 dirty = false;
151 break;
152 }
153
154 page = next;
155 idx++;
156 offset = 0;
157 base += DIRTY_MEMORY_BLOCK_SIZE;
158 }
159
160 rcu_read_unlock();
161
162 return dirty;
163 }
164
165 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
166 unsigned client)
167 {
168 return cpu_physical_memory_get_dirty(addr, 1, client);
169 }
170
171 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
172 {
173 bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
174 bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
175 bool migration =
176 cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
177 return !(vga && code && migration);
178 }
179
180 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
181 ram_addr_t length,
182 uint8_t mask)
183 {
184 uint8_t ret = 0;
185
186 if (mask & (1 << DIRTY_MEMORY_VGA) &&
187 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
188 ret |= (1 << DIRTY_MEMORY_VGA);
189 }
190 if (mask & (1 << DIRTY_MEMORY_CODE) &&
191 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
192 ret |= (1 << DIRTY_MEMORY_CODE);
193 }
194 if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
195 !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
196 ret |= (1 << DIRTY_MEMORY_MIGRATION);
197 }
198 return ret;
199 }
200
201 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
202 unsigned client)
203 {
204 unsigned long page, idx, offset;
205 DirtyMemoryBlocks *blocks;
206
207 assert(client < DIRTY_MEMORY_NUM);
208
209 page = addr >> TARGET_PAGE_BITS;
210 idx = page / DIRTY_MEMORY_BLOCK_SIZE;
211 offset = page % DIRTY_MEMORY_BLOCK_SIZE;
212
213 rcu_read_lock();
214
215 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
216
217 set_bit_atomic(offset, blocks->blocks[idx]);
218
219 rcu_read_unlock();
220 }
221
222 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
223 ram_addr_t length,
224 uint8_t mask)
225 {
226 DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
227 unsigned long end, page;
228 unsigned long idx, offset, base;
229 int i;
230
231 if (!mask && !xen_enabled()) {
232 return;
233 }
234
235 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
236 page = start >> TARGET_PAGE_BITS;
237
238 rcu_read_lock();
239
240 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
241 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
242 }
243
244 idx = page / DIRTY_MEMORY_BLOCK_SIZE;
245 offset = page % DIRTY_MEMORY_BLOCK_SIZE;
246 base = page - offset;
247 while (page < end) {
248 unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
249
250 if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
251 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
252 offset, next - page);
253 }
254 if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
255 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
256 offset, next - page);
257 }
258 if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
259 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
260 offset, next - page);
261 }
262
263 page = next;
264 idx++;
265 offset = 0;
266 base += DIRTY_MEMORY_BLOCK_SIZE;
267 }
268
269 rcu_read_unlock();
270
271 xen_hvm_modified_memory(start, length);
272 }
273
274 #if !defined(_WIN32)
275 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
276 ram_addr_t start,
277 ram_addr_t pages)
278 {
279 unsigned long i, j;
280 unsigned long page_number, c;
281 hwaddr addr;
282 ram_addr_t ram_addr;
283 unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
284 unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
285 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
286
287 /* start address is aligned at the start of a word? */
288 if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
289 (hpratio == 1)) {
290 unsigned long **blocks[DIRTY_MEMORY_NUM];
291 unsigned long idx;
292 unsigned long offset;
293 long k;
294 long nr = BITS_TO_LONGS(pages);
295
296 idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
297 offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
298 DIRTY_MEMORY_BLOCK_SIZE);
299
300 rcu_read_lock();
301
302 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
303 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
304 }
305
306 for (k = 0; k < nr; k++) {
307 if (bitmap[k]) {
308 unsigned long temp = leul_to_cpu(bitmap[k]);
309
310 atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp);
311 atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
312 if (tcg_enabled()) {
313 atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp);
314 }
315 }
316
317 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
318 offset = 0;
319 idx++;
320 }
321 }
322
323 rcu_read_unlock();
324
325 xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
326 } else {
327 uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
328 /*
329 * bitmap-traveling is faster than memory-traveling (for addr...)
330 * especially when most of the memory is not dirty.
331 */
332 for (i = 0; i < len; i++) {
333 if (bitmap[i] != 0) {
334 c = leul_to_cpu(bitmap[i]);
335 do {
336 j = ctzl(c);
337 c &= ~(1ul << j);
338 page_number = (i * HOST_LONG_BITS + j) * hpratio;
339 addr = page_number * TARGET_PAGE_SIZE;
340 ram_addr = start + addr;
341 cpu_physical_memory_set_dirty_range(ram_addr,
342 TARGET_PAGE_SIZE * hpratio, clients);
343 } while (c != 0);
344 }
345 }
346 }
347 }
348 #endif /* not _WIN32 */
349
350 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
351 ram_addr_t length,
352 unsigned client);
353
354 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
355 (ram_addr_t start, ram_addr_t length, unsigned client);
356
357 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
358 ram_addr_t start,
359 ram_addr_t length);
360
361 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
362 ram_addr_t length)
363 {
364 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
365 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
366 cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
367 }
368
369
370 static inline
371 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
372 ram_addr_t start,
373 ram_addr_t length,
374 uint64_t *real_dirty_pages)
375 {
376 ram_addr_t addr;
377 unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
378 uint64_t num_dirty = 0;
379 unsigned long *dest = rb->bmap;
380
381 /* start address is aligned at the start of a word? */
382 if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
383 int k;
384 int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
385 unsigned long * const *src;
386 unsigned long idx = (page * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
387 unsigned long offset = BIT_WORD((page * BITS_PER_LONG) %
388 DIRTY_MEMORY_BLOCK_SIZE);
389
390 rcu_read_lock();
391
392 src = atomic_rcu_read(
393 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
394
395 for (k = page; k < page + nr; k++) {
396 if (src[idx][offset]) {
397 unsigned long bits = atomic_xchg(&src[idx][offset], 0);
398 unsigned long new_dirty;
399 *real_dirty_pages += ctpopl(bits);
400 new_dirty = ~dest[k];
401 dest[k] |= bits;
402 new_dirty &= bits;
403 num_dirty += ctpopl(new_dirty);
404 }
405
406 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
407 offset = 0;
408 idx++;
409 }
410 }
411
412 rcu_read_unlock();
413 } else {
414 for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
415 if (cpu_physical_memory_test_and_clear_dirty(
416 start + addr,
417 TARGET_PAGE_SIZE,
418 DIRTY_MEMORY_MIGRATION)) {
419 *real_dirty_pages += 1;
420 long k = (start + addr) >> TARGET_PAGE_BITS;
421 if (!test_and_set_bit(k, dest)) {
422 num_dirty++;
423 }
424 }
425 }
426 }
427
428 return num_dirty;
429 }
430 #endif
431 #endif