]> git.proxmox.com Git - mirror_qemu.git/blob - dump.c
cde14d9d7560926c101ff525732a12e317b916c6
[mirror_qemu.git] / dump.c
1 /*
2 * QEMU dump
3 *
4 * Copyright Fujitsu, Corp. 2011, 2012
5 *
6 * Authors:
7 * Wen Congyang <wency@cn.fujitsu.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu-common.h"
15 #include "elf.h"
16 #include "cpu.h"
17 #include "exec/cpu-all.h"
18 #include "exec/hwaddr.h"
19 #include "monitor/monitor.h"
20 #include "sysemu/kvm.h"
21 #include "sysemu/dump.h"
22 #include "sysemu/sysemu.h"
23 #include "sysemu/memory_mapping.h"
24 #include "sysemu/cpus.h"
25 #include "qapi/error.h"
26 #include "qmp-commands.h"
27
28 #include <zlib.h>
29 #ifdef CONFIG_LZO
30 #include <lzo/lzo1x.h>
31 #endif
32 #ifdef CONFIG_SNAPPY
33 #include <snappy-c.h>
34 #endif
35 #ifndef ELF_MACHINE_UNAME
36 #define ELF_MACHINE_UNAME "Unknown"
37 #endif
38
39 static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
40 {
41 if (endian == ELFDATA2LSB) {
42 val = cpu_to_le16(val);
43 } else {
44 val = cpu_to_be16(val);
45 }
46
47 return val;
48 }
49
50 static uint32_t cpu_convert_to_target32(uint32_t val, int endian)
51 {
52 if (endian == ELFDATA2LSB) {
53 val = cpu_to_le32(val);
54 } else {
55 val = cpu_to_be32(val);
56 }
57
58 return val;
59 }
60
61 static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
62 {
63 if (endian == ELFDATA2LSB) {
64 val = cpu_to_le64(val);
65 } else {
66 val = cpu_to_be64(val);
67 }
68
69 return val;
70 }
71
72 typedef struct DumpState {
73 GuestPhysBlockList guest_phys_blocks;
74 ArchDumpInfo dump_info;
75 MemoryMappingList list;
76 uint16_t phdr_num;
77 uint32_t sh_info;
78 bool have_section;
79 bool resume;
80 ssize_t note_size;
81 hwaddr memory_offset;
82 int fd;
83
84 GuestPhysBlock *next_block;
85 ram_addr_t start;
86 bool has_filter;
87 int64_t begin;
88 int64_t length;
89
90 uint8_t *note_buf; /* buffer for notes */
91 size_t note_buf_offset; /* the writing place in note_buf */
92 uint32_t nr_cpus; /* number of guest's cpu */
93 size_t page_size; /* guest's page size */
94 uint64_t max_mapnr; /* the biggest guest's phys-mem's number */
95 size_t len_dump_bitmap; /* the size of the place used to store
96 dump_bitmap in vmcore */
97 off_t offset_dump_bitmap; /* offset of dump_bitmap part in vmcore */
98 off_t offset_page; /* offset of page part in vmcore */
99 size_t num_dumpable; /* number of page that can be dumped */
100 uint32_t flag_compress; /* indicate the compression format */
101 } DumpState;
102
103 static int dump_cleanup(DumpState *s)
104 {
105 int ret = 0;
106
107 guest_phys_blocks_free(&s->guest_phys_blocks);
108 memory_mapping_list_free(&s->list);
109 if (s->fd != -1) {
110 close(s->fd);
111 }
112 if (s->resume) {
113 vm_start();
114 }
115
116 return ret;
117 }
118
119 static void dump_error(DumpState *s, const char *reason)
120 {
121 dump_cleanup(s);
122 }
123
124 static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
125 {
126 DumpState *s = opaque;
127 size_t written_size;
128
129 written_size = qemu_write_full(s->fd, buf, size);
130 if (written_size != size) {
131 return -1;
132 }
133
134 return 0;
135 }
136
137 static int write_elf64_header(DumpState *s)
138 {
139 Elf64_Ehdr elf_header;
140 int ret;
141 int endian = s->dump_info.d_endian;
142
143 memset(&elf_header, 0, sizeof(Elf64_Ehdr));
144 memcpy(&elf_header, ELFMAG, SELFMAG);
145 elf_header.e_ident[EI_CLASS] = ELFCLASS64;
146 elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
147 elf_header.e_ident[EI_VERSION] = EV_CURRENT;
148 elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
149 elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
150 endian);
151 elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
152 elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
153 elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
154 elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
155 endian);
156 elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
157 if (s->have_section) {
158 uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
159
160 elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
161 elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
162 endian);
163 elf_header.e_shnum = cpu_convert_to_target16(1, endian);
164 }
165
166 ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
167 if (ret < 0) {
168 dump_error(s, "dump: failed to write elf header.\n");
169 return -1;
170 }
171
172 return 0;
173 }
174
175 static int write_elf32_header(DumpState *s)
176 {
177 Elf32_Ehdr elf_header;
178 int ret;
179 int endian = s->dump_info.d_endian;
180
181 memset(&elf_header, 0, sizeof(Elf32_Ehdr));
182 memcpy(&elf_header, ELFMAG, SELFMAG);
183 elf_header.e_ident[EI_CLASS] = ELFCLASS32;
184 elf_header.e_ident[EI_DATA] = endian;
185 elf_header.e_ident[EI_VERSION] = EV_CURRENT;
186 elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
187 elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
188 endian);
189 elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
190 elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
191 elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
192 elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
193 endian);
194 elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
195 if (s->have_section) {
196 uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
197
198 elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
199 elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
200 endian);
201 elf_header.e_shnum = cpu_convert_to_target16(1, endian);
202 }
203
204 ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
205 if (ret < 0) {
206 dump_error(s, "dump: failed to write elf header.\n");
207 return -1;
208 }
209
210 return 0;
211 }
212
213 static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
214 int phdr_index, hwaddr offset,
215 hwaddr filesz)
216 {
217 Elf64_Phdr phdr;
218 int ret;
219 int endian = s->dump_info.d_endian;
220
221 memset(&phdr, 0, sizeof(Elf64_Phdr));
222 phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
223 phdr.p_offset = cpu_convert_to_target64(offset, endian);
224 phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
225 phdr.p_filesz = cpu_convert_to_target64(filesz, endian);
226 phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
227 phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
228
229 assert(memory_mapping->length >= filesz);
230
231 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
232 if (ret < 0) {
233 dump_error(s, "dump: failed to write program header table.\n");
234 return -1;
235 }
236
237 return 0;
238 }
239
240 static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
241 int phdr_index, hwaddr offset,
242 hwaddr filesz)
243 {
244 Elf32_Phdr phdr;
245 int ret;
246 int endian = s->dump_info.d_endian;
247
248 memset(&phdr, 0, sizeof(Elf32_Phdr));
249 phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
250 phdr.p_offset = cpu_convert_to_target32(offset, endian);
251 phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
252 phdr.p_filesz = cpu_convert_to_target32(filesz, endian);
253 phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
254 phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
255
256 assert(memory_mapping->length >= filesz);
257
258 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
259 if (ret < 0) {
260 dump_error(s, "dump: failed to write program header table.\n");
261 return -1;
262 }
263
264 return 0;
265 }
266
267 static int write_elf64_note(DumpState *s)
268 {
269 Elf64_Phdr phdr;
270 int endian = s->dump_info.d_endian;
271 hwaddr begin = s->memory_offset - s->note_size;
272 int ret;
273
274 memset(&phdr, 0, sizeof(Elf64_Phdr));
275 phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
276 phdr.p_offset = cpu_convert_to_target64(begin, endian);
277 phdr.p_paddr = 0;
278 phdr.p_filesz = cpu_convert_to_target64(s->note_size, endian);
279 phdr.p_memsz = cpu_convert_to_target64(s->note_size, endian);
280 phdr.p_vaddr = 0;
281
282 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
283 if (ret < 0) {
284 dump_error(s, "dump: failed to write program header table.\n");
285 return -1;
286 }
287
288 return 0;
289 }
290
291 static inline int cpu_index(CPUState *cpu)
292 {
293 return cpu->cpu_index + 1;
294 }
295
296 static int write_elf64_notes(WriteCoreDumpFunction f, DumpState *s)
297 {
298 CPUState *cpu;
299 int ret;
300 int id;
301
302 CPU_FOREACH(cpu) {
303 id = cpu_index(cpu);
304 ret = cpu_write_elf64_note(f, cpu, id, s);
305 if (ret < 0) {
306 dump_error(s, "dump: failed to write elf notes.\n");
307 return -1;
308 }
309 }
310
311 CPU_FOREACH(cpu) {
312 ret = cpu_write_elf64_qemunote(f, cpu, s);
313 if (ret < 0) {
314 dump_error(s, "dump: failed to write CPU status.\n");
315 return -1;
316 }
317 }
318
319 return 0;
320 }
321
322 static int write_elf32_note(DumpState *s)
323 {
324 hwaddr begin = s->memory_offset - s->note_size;
325 Elf32_Phdr phdr;
326 int endian = s->dump_info.d_endian;
327 int ret;
328
329 memset(&phdr, 0, sizeof(Elf32_Phdr));
330 phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
331 phdr.p_offset = cpu_convert_to_target32(begin, endian);
332 phdr.p_paddr = 0;
333 phdr.p_filesz = cpu_convert_to_target32(s->note_size, endian);
334 phdr.p_memsz = cpu_convert_to_target32(s->note_size, endian);
335 phdr.p_vaddr = 0;
336
337 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
338 if (ret < 0) {
339 dump_error(s, "dump: failed to write program header table.\n");
340 return -1;
341 }
342
343 return 0;
344 }
345
346 static int write_elf32_notes(WriteCoreDumpFunction f, DumpState *s)
347 {
348 CPUState *cpu;
349 int ret;
350 int id;
351
352 CPU_FOREACH(cpu) {
353 id = cpu_index(cpu);
354 ret = cpu_write_elf32_note(f, cpu, id, s);
355 if (ret < 0) {
356 dump_error(s, "dump: failed to write elf notes.\n");
357 return -1;
358 }
359 }
360
361 CPU_FOREACH(cpu) {
362 ret = cpu_write_elf32_qemunote(f, cpu, s);
363 if (ret < 0) {
364 dump_error(s, "dump: failed to write CPU status.\n");
365 return -1;
366 }
367 }
368
369 return 0;
370 }
371
372 static int write_elf_section(DumpState *s, int type)
373 {
374 Elf32_Shdr shdr32;
375 Elf64_Shdr shdr64;
376 int endian = s->dump_info.d_endian;
377 int shdr_size;
378 void *shdr;
379 int ret;
380
381 if (type == 0) {
382 shdr_size = sizeof(Elf32_Shdr);
383 memset(&shdr32, 0, shdr_size);
384 shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
385 shdr = &shdr32;
386 } else {
387 shdr_size = sizeof(Elf64_Shdr);
388 memset(&shdr64, 0, shdr_size);
389 shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
390 shdr = &shdr64;
391 }
392
393 ret = fd_write_vmcore(&shdr, shdr_size, s);
394 if (ret < 0) {
395 dump_error(s, "dump: failed to write section header table.\n");
396 return -1;
397 }
398
399 return 0;
400 }
401
402 static int write_data(DumpState *s, void *buf, int length)
403 {
404 int ret;
405
406 ret = fd_write_vmcore(buf, length, s);
407 if (ret < 0) {
408 dump_error(s, "dump: failed to save memory.\n");
409 return -1;
410 }
411
412 return 0;
413 }
414
415 /* write the memroy to vmcore. 1 page per I/O. */
416 static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
417 int64_t size)
418 {
419 int64_t i;
420 int ret;
421
422 for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
423 ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
424 TARGET_PAGE_SIZE);
425 if (ret < 0) {
426 return ret;
427 }
428 }
429
430 if ((size % TARGET_PAGE_SIZE) != 0) {
431 ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
432 size % TARGET_PAGE_SIZE);
433 if (ret < 0) {
434 return ret;
435 }
436 }
437
438 return 0;
439 }
440
441 /* get the memory's offset and size in the vmcore */
442 static void get_offset_range(hwaddr phys_addr,
443 ram_addr_t mapping_length,
444 DumpState *s,
445 hwaddr *p_offset,
446 hwaddr *p_filesz)
447 {
448 GuestPhysBlock *block;
449 hwaddr offset = s->memory_offset;
450 int64_t size_in_block, start;
451
452 /* When the memory is not stored into vmcore, offset will be -1 */
453 *p_offset = -1;
454 *p_filesz = 0;
455
456 if (s->has_filter) {
457 if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
458 return;
459 }
460 }
461
462 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
463 if (s->has_filter) {
464 if (block->target_start >= s->begin + s->length ||
465 block->target_end <= s->begin) {
466 /* This block is out of the range */
467 continue;
468 }
469
470 if (s->begin <= block->target_start) {
471 start = block->target_start;
472 } else {
473 start = s->begin;
474 }
475
476 size_in_block = block->target_end - start;
477 if (s->begin + s->length < block->target_end) {
478 size_in_block -= block->target_end - (s->begin + s->length);
479 }
480 } else {
481 start = block->target_start;
482 size_in_block = block->target_end - block->target_start;
483 }
484
485 if (phys_addr >= start && phys_addr < start + size_in_block) {
486 *p_offset = phys_addr - start + offset;
487
488 /* The offset range mapped from the vmcore file must not spill over
489 * the GuestPhysBlock, clamp it. The rest of the mapping will be
490 * zero-filled in memory at load time; see
491 * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
492 */
493 *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
494 mapping_length :
495 size_in_block - (phys_addr - start);
496 return;
497 }
498
499 offset += size_in_block;
500 }
501 }
502
503 static int write_elf_loads(DumpState *s)
504 {
505 hwaddr offset, filesz;
506 MemoryMapping *memory_mapping;
507 uint32_t phdr_index = 1;
508 int ret;
509 uint32_t max_index;
510
511 if (s->have_section) {
512 max_index = s->sh_info;
513 } else {
514 max_index = s->phdr_num;
515 }
516
517 QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
518 get_offset_range(memory_mapping->phys_addr,
519 memory_mapping->length,
520 s, &offset, &filesz);
521 if (s->dump_info.d_class == ELFCLASS64) {
522 ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
523 filesz);
524 } else {
525 ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
526 filesz);
527 }
528
529 if (ret < 0) {
530 return -1;
531 }
532
533 if (phdr_index >= max_index) {
534 break;
535 }
536 }
537
538 return 0;
539 }
540
541 /* write elf header, PT_NOTE and elf note to vmcore. */
542 static int dump_begin(DumpState *s)
543 {
544 int ret;
545
546 /*
547 * the vmcore's format is:
548 * --------------
549 * | elf header |
550 * --------------
551 * | PT_NOTE |
552 * --------------
553 * | PT_LOAD |
554 * --------------
555 * | ...... |
556 * --------------
557 * | PT_LOAD |
558 * --------------
559 * | sec_hdr |
560 * --------------
561 * | elf note |
562 * --------------
563 * | memory |
564 * --------------
565 *
566 * we only know where the memory is saved after we write elf note into
567 * vmcore.
568 */
569
570 /* write elf header to vmcore */
571 if (s->dump_info.d_class == ELFCLASS64) {
572 ret = write_elf64_header(s);
573 } else {
574 ret = write_elf32_header(s);
575 }
576 if (ret < 0) {
577 return -1;
578 }
579
580 if (s->dump_info.d_class == ELFCLASS64) {
581 /* write PT_NOTE to vmcore */
582 if (write_elf64_note(s) < 0) {
583 return -1;
584 }
585
586 /* write all PT_LOAD to vmcore */
587 if (write_elf_loads(s) < 0) {
588 return -1;
589 }
590
591 /* write section to vmcore */
592 if (s->have_section) {
593 if (write_elf_section(s, 1) < 0) {
594 return -1;
595 }
596 }
597
598 /* write notes to vmcore */
599 if (write_elf64_notes(fd_write_vmcore, s) < 0) {
600 return -1;
601 }
602
603 } else {
604 /* write PT_NOTE to vmcore */
605 if (write_elf32_note(s) < 0) {
606 return -1;
607 }
608
609 /* write all PT_LOAD to vmcore */
610 if (write_elf_loads(s) < 0) {
611 return -1;
612 }
613
614 /* write section to vmcore */
615 if (s->have_section) {
616 if (write_elf_section(s, 0) < 0) {
617 return -1;
618 }
619 }
620
621 /* write notes to vmcore */
622 if (write_elf32_notes(fd_write_vmcore, s) < 0) {
623 return -1;
624 }
625 }
626
627 return 0;
628 }
629
630 /* write PT_LOAD to vmcore */
631 static int dump_completed(DumpState *s)
632 {
633 dump_cleanup(s);
634 return 0;
635 }
636
637 static int get_next_block(DumpState *s, GuestPhysBlock *block)
638 {
639 while (1) {
640 block = QTAILQ_NEXT(block, next);
641 if (!block) {
642 /* no more block */
643 return 1;
644 }
645
646 s->start = 0;
647 s->next_block = block;
648 if (s->has_filter) {
649 if (block->target_start >= s->begin + s->length ||
650 block->target_end <= s->begin) {
651 /* This block is out of the range */
652 continue;
653 }
654
655 if (s->begin > block->target_start) {
656 s->start = s->begin - block->target_start;
657 }
658 }
659
660 return 0;
661 }
662 }
663
664 /* write all memory to vmcore */
665 static int dump_iterate(DumpState *s)
666 {
667 GuestPhysBlock *block;
668 int64_t size;
669 int ret;
670
671 while (1) {
672 block = s->next_block;
673
674 size = block->target_end - block->target_start;
675 if (s->has_filter) {
676 size -= s->start;
677 if (s->begin + s->length < block->target_end) {
678 size -= block->target_end - (s->begin + s->length);
679 }
680 }
681 ret = write_memory(s, block, s->start, size);
682 if (ret == -1) {
683 return ret;
684 }
685
686 ret = get_next_block(s, block);
687 if (ret == 1) {
688 dump_completed(s);
689 return 0;
690 }
691 }
692 }
693
694 static int create_vmcore(DumpState *s)
695 {
696 int ret;
697
698 ret = dump_begin(s);
699 if (ret < 0) {
700 return -1;
701 }
702
703 ret = dump_iterate(s);
704 if (ret < 0) {
705 return -1;
706 }
707
708 return 0;
709 }
710
711 static int write_start_flat_header(int fd)
712 {
713 MakedumpfileHeader *mh;
714 int ret = 0;
715
716 QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
717 mh = g_malloc0(MAX_SIZE_MDF_HEADER);
718
719 memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
720 MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
721
722 mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
723 mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
724
725 size_t written_size;
726 written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
727 if (written_size != MAX_SIZE_MDF_HEADER) {
728 ret = -1;
729 }
730
731 g_free(mh);
732 return ret;
733 }
734
735 static int write_end_flat_header(int fd)
736 {
737 MakedumpfileDataHeader mdh;
738
739 mdh.offset = END_FLAG_FLAT_HEADER;
740 mdh.buf_size = END_FLAG_FLAT_HEADER;
741
742 size_t written_size;
743 written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
744 if (written_size != sizeof(mdh)) {
745 return -1;
746 }
747
748 return 0;
749 }
750
751 static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
752 {
753 size_t written_size;
754 MakedumpfileDataHeader mdh;
755
756 mdh.offset = cpu_to_be64(offset);
757 mdh.buf_size = cpu_to_be64(size);
758
759 written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
760 if (written_size != sizeof(mdh)) {
761 return -1;
762 }
763
764 written_size = qemu_write_full(fd, buf, size);
765 if (written_size != size) {
766 return -1;
767 }
768
769 return 0;
770 }
771
772 static int buf_write_note(const void *buf, size_t size, void *opaque)
773 {
774 DumpState *s = opaque;
775
776 /* note_buf is not enough */
777 if (s->note_buf_offset + size > s->note_size) {
778 return -1;
779 }
780
781 memcpy(s->note_buf + s->note_buf_offset, buf, size);
782
783 s->note_buf_offset += size;
784
785 return 0;
786 }
787
788 /* write common header, sub header and elf note to vmcore */
789 static int create_header32(DumpState *s)
790 {
791 int ret = 0;
792 DiskDumpHeader32 *dh = NULL;
793 KdumpSubHeader32 *kh = NULL;
794 size_t size;
795 int endian = s->dump_info.d_endian;
796 uint32_t block_size;
797 uint32_t sub_hdr_size;
798 uint32_t bitmap_blocks;
799 uint32_t status = 0;
800 uint64_t offset_note;
801
802 /* write common header, the version of kdump-compressed format is 6th */
803 size = sizeof(DiskDumpHeader32);
804 dh = g_malloc0(size);
805
806 strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
807 dh->header_version = cpu_convert_to_target32(6, endian);
808 block_size = s->page_size;
809 dh->block_size = cpu_convert_to_target32(block_size, endian);
810 sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
811 sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
812 dh->sub_hdr_size = cpu_convert_to_target32(sub_hdr_size, endian);
813 /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
814 dh->max_mapnr = cpu_convert_to_target32(MIN(s->max_mapnr, UINT_MAX),
815 endian);
816 dh->nr_cpus = cpu_convert_to_target32(s->nr_cpus, endian);
817 bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
818 dh->bitmap_blocks = cpu_convert_to_target32(bitmap_blocks, endian);
819 strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
820
821 if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
822 status |= DUMP_DH_COMPRESSED_ZLIB;
823 }
824 #ifdef CONFIG_LZO
825 if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
826 status |= DUMP_DH_COMPRESSED_LZO;
827 }
828 #endif
829 #ifdef CONFIG_SNAPPY
830 if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
831 status |= DUMP_DH_COMPRESSED_SNAPPY;
832 }
833 #endif
834 dh->status = cpu_convert_to_target32(status, endian);
835
836 if (write_buffer(s->fd, 0, dh, size) < 0) {
837 dump_error(s, "dump: failed to write disk dump header.\n");
838 ret = -1;
839 goto out;
840 }
841
842 /* write sub header */
843 size = sizeof(KdumpSubHeader32);
844 kh = g_malloc0(size);
845
846 /* 64bit max_mapnr_64 */
847 kh->max_mapnr_64 = cpu_convert_to_target64(s->max_mapnr, endian);
848 kh->phys_base = cpu_convert_to_target32(PHYS_BASE, endian);
849 kh->dump_level = cpu_convert_to_target32(DUMP_LEVEL, endian);
850
851 offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
852 kh->offset_note = cpu_convert_to_target64(offset_note, endian);
853 kh->note_size = cpu_convert_to_target32(s->note_size, endian);
854
855 if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
856 block_size, kh, size) < 0) {
857 dump_error(s, "dump: failed to write kdump sub header.\n");
858 ret = -1;
859 goto out;
860 }
861
862 /* write note */
863 s->note_buf = g_malloc0(s->note_size);
864 s->note_buf_offset = 0;
865
866 /* use s->note_buf to store notes temporarily */
867 if (write_elf32_notes(buf_write_note, s) < 0) {
868 ret = -1;
869 goto out;
870 }
871
872 if (write_buffer(s->fd, offset_note, s->note_buf,
873 s->note_size) < 0) {
874 dump_error(s, "dump: failed to write notes");
875 ret = -1;
876 goto out;
877 }
878
879 /* get offset of dump_bitmap */
880 s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
881 block_size;
882
883 /* get offset of page */
884 s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
885 block_size;
886
887 out:
888 g_free(dh);
889 g_free(kh);
890 g_free(s->note_buf);
891
892 return ret;
893 }
894
895 /* write common header, sub header and elf note to vmcore */
896 static int create_header64(DumpState *s)
897 {
898 int ret = 0;
899 DiskDumpHeader64 *dh = NULL;
900 KdumpSubHeader64 *kh = NULL;
901 size_t size;
902 int endian = s->dump_info.d_endian;
903 uint32_t block_size;
904 uint32_t sub_hdr_size;
905 uint32_t bitmap_blocks;
906 uint32_t status = 0;
907 uint64_t offset_note;
908
909 /* write common header, the version of kdump-compressed format is 6th */
910 size = sizeof(DiskDumpHeader64);
911 dh = g_malloc0(size);
912
913 strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
914 dh->header_version = cpu_convert_to_target32(6, endian);
915 block_size = s->page_size;
916 dh->block_size = cpu_convert_to_target32(block_size, endian);
917 sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
918 sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
919 dh->sub_hdr_size = cpu_convert_to_target32(sub_hdr_size, endian);
920 /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
921 dh->max_mapnr = cpu_convert_to_target32(MIN(s->max_mapnr, UINT_MAX),
922 endian);
923 dh->nr_cpus = cpu_convert_to_target32(s->nr_cpus, endian);
924 bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
925 dh->bitmap_blocks = cpu_convert_to_target32(bitmap_blocks, endian);
926 strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
927
928 if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
929 status |= DUMP_DH_COMPRESSED_ZLIB;
930 }
931 #ifdef CONFIG_LZO
932 if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
933 status |= DUMP_DH_COMPRESSED_LZO;
934 }
935 #endif
936 #ifdef CONFIG_SNAPPY
937 if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
938 status |= DUMP_DH_COMPRESSED_SNAPPY;
939 }
940 #endif
941 dh->status = cpu_convert_to_target32(status, endian);
942
943 if (write_buffer(s->fd, 0, dh, size) < 0) {
944 dump_error(s, "dump: failed to write disk dump header.\n");
945 ret = -1;
946 goto out;
947 }
948
949 /* write sub header */
950 size = sizeof(KdumpSubHeader64);
951 kh = g_malloc0(size);
952
953 /* 64bit max_mapnr_64 */
954 kh->max_mapnr_64 = cpu_convert_to_target64(s->max_mapnr, endian);
955 kh->phys_base = cpu_convert_to_target64(PHYS_BASE, endian);
956 kh->dump_level = cpu_convert_to_target32(DUMP_LEVEL, endian);
957
958 offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
959 kh->offset_note = cpu_convert_to_target64(offset_note, endian);
960 kh->note_size = cpu_convert_to_target64(s->note_size, endian);
961
962 if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
963 block_size, kh, size) < 0) {
964 dump_error(s, "dump: failed to write kdump sub header.\n");
965 ret = -1;
966 goto out;
967 }
968
969 /* write note */
970 s->note_buf = g_malloc0(s->note_size);
971 s->note_buf_offset = 0;
972
973 /* use s->note_buf to store notes temporarily */
974 if (write_elf64_notes(buf_write_note, s) < 0) {
975 ret = -1;
976 goto out;
977 }
978
979 if (write_buffer(s->fd, offset_note, s->note_buf,
980 s->note_size) < 0) {
981 dump_error(s, "dump: failed to write notes");
982 ret = -1;
983 goto out;
984 }
985
986 /* get offset of dump_bitmap */
987 s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
988 block_size;
989
990 /* get offset of page */
991 s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
992 block_size;
993
994 out:
995 g_free(dh);
996 g_free(kh);
997 g_free(s->note_buf);
998
999 return ret;
1000 }
1001
1002 static int write_dump_header(DumpState *s)
1003 {
1004 if (s->dump_info.d_machine == EM_386) {
1005 return create_header32(s);
1006 } else {
1007 return create_header64(s);
1008 }
1009 }
1010
1011 /*
1012 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
1013 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
1014 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
1015 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
1016 * vmcore, ie. synchronizing un-sync bit into vmcore.
1017 */
1018 static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
1019 uint8_t *buf, DumpState *s)
1020 {
1021 off_t old_offset, new_offset;
1022 off_t offset_bitmap1, offset_bitmap2;
1023 uint32_t byte, bit;
1024
1025 /* should not set the previous place */
1026 assert(last_pfn <= pfn);
1027
1028 /*
1029 * if the bit needed to be set is not cached in buf, flush the data in buf
1030 * to vmcore firstly.
1031 * making new_offset be bigger than old_offset can also sync remained data
1032 * into vmcore.
1033 */
1034 old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
1035 new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
1036
1037 while (old_offset < new_offset) {
1038 /* calculate the offset and write dump_bitmap */
1039 offset_bitmap1 = s->offset_dump_bitmap + old_offset;
1040 if (write_buffer(s->fd, offset_bitmap1, buf,
1041 BUFSIZE_BITMAP) < 0) {
1042 return -1;
1043 }
1044
1045 /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
1046 offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
1047 old_offset;
1048 if (write_buffer(s->fd, offset_bitmap2, buf,
1049 BUFSIZE_BITMAP) < 0) {
1050 return -1;
1051 }
1052
1053 memset(buf, 0, BUFSIZE_BITMAP);
1054 old_offset += BUFSIZE_BITMAP;
1055 }
1056
1057 /* get the exact place of the bit in the buf, and set it */
1058 byte = (pfn % PFN_BUFBITMAP) / CHAR_BIT;
1059 bit = (pfn % PFN_BUFBITMAP) % CHAR_BIT;
1060 if (value) {
1061 buf[byte] |= 1u << bit;
1062 } else {
1063 buf[byte] &= ~(1u << bit);
1064 }
1065
1066 return 0;
1067 }
1068
1069 /*
1070 * exam every page and return the page frame number and the address of the page.
1071 * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
1072 * blocks, so block->target_start and block->target_end should be interal
1073 * multiples of the target page size.
1074 */
1075 static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
1076 uint8_t **bufptr, DumpState *s)
1077 {
1078 GuestPhysBlock *block = *blockptr;
1079 hwaddr addr;
1080 uint8_t *buf;
1081
1082 /* block == NULL means the start of the iteration */
1083 if (!block) {
1084 block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1085 *blockptr = block;
1086 assert(block->target_start % s->page_size == 0);
1087 assert(block->target_end % s->page_size == 0);
1088 *pfnptr = paddr_to_pfn(block->target_start);
1089 if (bufptr) {
1090 *bufptr = block->host_addr;
1091 }
1092 return true;
1093 }
1094
1095 *pfnptr = *pfnptr + 1;
1096 addr = pfn_to_paddr(*pfnptr);
1097
1098 if ((addr >= block->target_start) &&
1099 (addr + s->page_size <= block->target_end)) {
1100 buf = block->host_addr + (addr - block->target_start);
1101 } else {
1102 /* the next page is in the next block */
1103 block = QTAILQ_NEXT(block, next);
1104 *blockptr = block;
1105 if (!block) {
1106 return false;
1107 }
1108 assert(block->target_start % s->page_size == 0);
1109 assert(block->target_end % s->page_size == 0);
1110 *pfnptr = paddr_to_pfn(block->target_start);
1111 buf = block->host_addr;
1112 }
1113
1114 if (bufptr) {
1115 *bufptr = buf;
1116 }
1117
1118 return true;
1119 }
1120
1121 static int write_dump_bitmap(DumpState *s)
1122 {
1123 int ret = 0;
1124 uint64_t last_pfn, pfn;
1125 void *dump_bitmap_buf;
1126 size_t num_dumpable;
1127 GuestPhysBlock *block_iter = NULL;
1128
1129 /* dump_bitmap_buf is used to store dump_bitmap temporarily */
1130 dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
1131
1132 num_dumpable = 0;
1133 last_pfn = 0;
1134
1135 /*
1136 * exam memory page by page, and set the bit in dump_bitmap corresponded
1137 * to the existing page.
1138 */
1139 while (get_next_page(&block_iter, &pfn, NULL, s)) {
1140 ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
1141 if (ret < 0) {
1142 dump_error(s, "dump: failed to set dump_bitmap.\n");
1143 ret = -1;
1144 goto out;
1145 }
1146
1147 last_pfn = pfn;
1148 num_dumpable++;
1149 }
1150
1151 /*
1152 * set_dump_bitmap will always leave the recently set bit un-sync. Here we
1153 * set last_pfn + PFN_BUFBITMAP to 0 and those set but un-sync bit will be
1154 * synchronized into vmcore.
1155 */
1156 if (num_dumpable > 0) {
1157 ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
1158 dump_bitmap_buf, s);
1159 if (ret < 0) {
1160 dump_error(s, "dump: failed to sync dump_bitmap.\n");
1161 ret = -1;
1162 goto out;
1163 }
1164 }
1165
1166 /* number of dumpable pages that will be dumped later */
1167 s->num_dumpable = num_dumpable;
1168
1169 out:
1170 g_free(dump_bitmap_buf);
1171
1172 return ret;
1173 }
1174
1175 static void prepare_data_cache(DataCache *data_cache, DumpState *s,
1176 off_t offset)
1177 {
1178 data_cache->fd = s->fd;
1179 data_cache->data_size = 0;
1180 data_cache->buf_size = BUFSIZE_DATA_CACHE;
1181 data_cache->buf = g_malloc0(BUFSIZE_DATA_CACHE);
1182 data_cache->offset = offset;
1183 }
1184
1185 static int write_cache(DataCache *dc, const void *buf, size_t size,
1186 bool flag_sync)
1187 {
1188 /*
1189 * dc->buf_size should not be less than size, otherwise dc will never be
1190 * enough
1191 */
1192 assert(size <= dc->buf_size);
1193
1194 /*
1195 * if flag_sync is set, synchronize data in dc->buf into vmcore.
1196 * otherwise check if the space is enough for caching data in buf, if not,
1197 * write the data in dc->buf to dc->fd and reset dc->buf
1198 */
1199 if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
1200 (flag_sync && dc->data_size > 0)) {
1201 if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
1202 return -1;
1203 }
1204
1205 dc->offset += dc->data_size;
1206 dc->data_size = 0;
1207 }
1208
1209 if (!flag_sync) {
1210 memcpy(dc->buf + dc->data_size, buf, size);
1211 dc->data_size += size;
1212 }
1213
1214 return 0;
1215 }
1216
1217 static void free_data_cache(DataCache *data_cache)
1218 {
1219 g_free(data_cache->buf);
1220 }
1221
1222 static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
1223 {
1224 size_t len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy;
1225 size_t len_buf_out;
1226
1227 /* init buf_out */
1228 len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0;
1229
1230 /* buf size for zlib */
1231 len_buf_out_zlib = compressBound(page_size);
1232
1233 /* buf size for lzo */
1234 #ifdef CONFIG_LZO
1235 if (flag_compress & DUMP_DH_COMPRESSED_LZO) {
1236 if (lzo_init() != LZO_E_OK) {
1237 /* return 0 to indicate lzo is unavailable */
1238 return 0;
1239 }
1240 }
1241
1242 /*
1243 * LZO will expand incompressible data by a little amount. please check the
1244 * following URL to see the expansion calculation:
1245 * http://www.oberhumer.com/opensource/lzo/lzofaq.php
1246 */
1247 len_buf_out_lzo = page_size + page_size / 16 + 64 + 3;
1248 #endif
1249
1250 #ifdef CONFIG_SNAPPY
1251 /* buf size for snappy */
1252 len_buf_out_snappy = snappy_max_compressed_length(page_size);
1253 #endif
1254
1255 /* get the biggest that can store all kinds of compressed page */
1256 len_buf_out = MAX(len_buf_out_zlib,
1257 MAX(len_buf_out_lzo, len_buf_out_snappy));
1258
1259 return len_buf_out;
1260 }
1261
1262 /*
1263 * check if the page is all 0
1264 */
1265 static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
1266 {
1267 return buffer_is_zero(buf, page_size);
1268 }
1269
1270 static int write_dump_pages(DumpState *s)
1271 {
1272 int ret = 0;
1273 DataCache page_desc, page_data;
1274 size_t len_buf_out, size_out;
1275 #ifdef CONFIG_LZO
1276 lzo_bytep wrkmem = NULL;
1277 #endif
1278 uint8_t *buf_out = NULL;
1279 off_t offset_desc, offset_data;
1280 PageDescriptor pd, pd_zero;
1281 uint8_t *buf;
1282 int endian = s->dump_info.d_endian;
1283 GuestPhysBlock *block_iter = NULL;
1284 uint64_t pfn_iter;
1285
1286 /* get offset of page_desc and page_data in dump file */
1287 offset_desc = s->offset_page;
1288 offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
1289
1290 prepare_data_cache(&page_desc, s, offset_desc);
1291 prepare_data_cache(&page_data, s, offset_data);
1292
1293 /* prepare buffer to store compressed data */
1294 len_buf_out = get_len_buf_out(s->page_size, s->flag_compress);
1295 if (len_buf_out == 0) {
1296 dump_error(s, "dump: failed to get length of output buffer.\n");
1297 goto out;
1298 }
1299
1300 #ifdef CONFIG_LZO
1301 wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
1302 #endif
1303
1304 buf_out = g_malloc(len_buf_out);
1305
1306 /*
1307 * init zero page's page_desc and page_data, because every zero page
1308 * uses the same page_data
1309 */
1310 pd_zero.size = cpu_convert_to_target32(s->page_size, endian);
1311 pd_zero.flags = cpu_convert_to_target32(0, endian);
1312 pd_zero.offset = cpu_convert_to_target64(offset_data, endian);
1313 pd_zero.page_flags = cpu_convert_to_target64(0, endian);
1314 buf = g_malloc0(s->page_size);
1315 ret = write_cache(&page_data, buf, s->page_size, false);
1316 g_free(buf);
1317 if (ret < 0) {
1318 dump_error(s, "dump: failed to write page data(zero page).\n");
1319 goto out;
1320 }
1321
1322 offset_data += s->page_size;
1323
1324 /*
1325 * dump memory to vmcore page by page. zero page will all be resided in the
1326 * first page of page section
1327 */
1328 while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
1329 /* check zero page */
1330 if (is_zero_page(buf, s->page_size)) {
1331 ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
1332 false);
1333 if (ret < 0) {
1334 dump_error(s, "dump: failed to write page desc.\n");
1335 goto out;
1336 }
1337 } else {
1338 /*
1339 * not zero page, then:
1340 * 1. compress the page
1341 * 2. write the compressed page into the cache of page_data
1342 * 3. get page desc of the compressed page and write it into the
1343 * cache of page_desc
1344 *
1345 * only one compression format will be used here, for
1346 * s->flag_compress is set. But when compression fails to work,
1347 * we fall back to save in plaintext.
1348 */
1349 size_out = len_buf_out;
1350 if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
1351 (compress2(buf_out, (uLongf *)&size_out, buf, s->page_size,
1352 Z_BEST_SPEED) == Z_OK) && (size_out < s->page_size)) {
1353 pd.flags = cpu_convert_to_target32(DUMP_DH_COMPRESSED_ZLIB,
1354 endian);
1355 pd.size = cpu_convert_to_target32(size_out, endian);
1356
1357 ret = write_cache(&page_data, buf_out, size_out, false);
1358 if (ret < 0) {
1359 dump_error(s, "dump: failed to write page data.\n");
1360 goto out;
1361 }
1362 #ifdef CONFIG_LZO
1363 } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
1364 (lzo1x_1_compress(buf, s->page_size, buf_out,
1365 (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
1366 (size_out < s->page_size)) {
1367 pd.flags = cpu_convert_to_target32(DUMP_DH_COMPRESSED_LZO,
1368 endian);
1369 pd.size = cpu_convert_to_target32(size_out, endian);
1370
1371 ret = write_cache(&page_data, buf_out, size_out, false);
1372 if (ret < 0) {
1373 dump_error(s, "dump: failed to write page data.\n");
1374 goto out;
1375 }
1376 #endif
1377 #ifdef CONFIG_SNAPPY
1378 } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
1379 (snappy_compress((char *)buf, s->page_size,
1380 (char *)buf_out, &size_out) == SNAPPY_OK) &&
1381 (size_out < s->page_size)) {
1382 pd.flags = cpu_convert_to_target32(
1383 DUMP_DH_COMPRESSED_SNAPPY, endian);
1384 pd.size = cpu_convert_to_target32(size_out, endian);
1385
1386 ret = write_cache(&page_data, buf_out, size_out, false);
1387 if (ret < 0) {
1388 dump_error(s, "dump: failed to write page data.\n");
1389 goto out;
1390 }
1391 #endif
1392 } else {
1393 /*
1394 * fall back to save in plaintext, size_out should be
1395 * assigned to s->page_size
1396 */
1397 pd.flags = cpu_convert_to_target32(0, endian);
1398 size_out = s->page_size;
1399 pd.size = cpu_convert_to_target32(size_out, endian);
1400
1401 ret = write_cache(&page_data, buf, s->page_size, false);
1402 if (ret < 0) {
1403 dump_error(s, "dump: failed to write page data.\n");
1404 goto out;
1405 }
1406 }
1407
1408 /* get and write page desc here */
1409 pd.page_flags = cpu_convert_to_target64(0, endian);
1410 pd.offset = cpu_convert_to_target64(offset_data, endian);
1411 offset_data += size_out;
1412
1413 ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
1414 if (ret < 0) {
1415 dump_error(s, "dump: failed to write page desc.\n");
1416 goto out;
1417 }
1418 }
1419 }
1420
1421 ret = write_cache(&page_desc, NULL, 0, true);
1422 if (ret < 0) {
1423 dump_error(s, "dump: failed to sync cache for page_desc.\n");
1424 goto out;
1425 }
1426 ret = write_cache(&page_data, NULL, 0, true);
1427 if (ret < 0) {
1428 dump_error(s, "dump: failed to sync cache for page_data.\n");
1429 goto out;
1430 }
1431
1432 out:
1433 free_data_cache(&page_desc);
1434 free_data_cache(&page_data);
1435
1436 #ifdef CONFIG_LZO
1437 g_free(wrkmem);
1438 #endif
1439
1440 g_free(buf_out);
1441
1442 return ret;
1443 }
1444
1445 static int create_kdump_vmcore(DumpState *s)
1446 {
1447 int ret;
1448
1449 /*
1450 * the kdump-compressed format is:
1451 * File offset
1452 * +------------------------------------------+ 0x0
1453 * | main header (struct disk_dump_header) |
1454 * |------------------------------------------+ block 1
1455 * | sub header (struct kdump_sub_header) |
1456 * |------------------------------------------+ block 2
1457 * | 1st-dump_bitmap |
1458 * |------------------------------------------+ block 2 + X blocks
1459 * | 2nd-dump_bitmap | (aligned by block)
1460 * |------------------------------------------+ block 2 + 2 * X blocks
1461 * | page desc for pfn 0 (struct page_desc) | (aligned by block)
1462 * | page desc for pfn 1 (struct page_desc) |
1463 * | : |
1464 * |------------------------------------------| (not aligned by block)
1465 * | page data (pfn 0) |
1466 * | page data (pfn 1) |
1467 * | : |
1468 * +------------------------------------------+
1469 */
1470
1471 ret = write_start_flat_header(s->fd);
1472 if (ret < 0) {
1473 dump_error(s, "dump: failed to write start flat header.\n");
1474 return -1;
1475 }
1476
1477 ret = write_dump_header(s);
1478 if (ret < 0) {
1479 return -1;
1480 }
1481
1482 ret = write_dump_bitmap(s);
1483 if (ret < 0) {
1484 return -1;
1485 }
1486
1487 ret = write_dump_pages(s);
1488 if (ret < 0) {
1489 return -1;
1490 }
1491
1492 ret = write_end_flat_header(s->fd);
1493 if (ret < 0) {
1494 dump_error(s, "dump: failed to write end flat header.\n");
1495 return -1;
1496 }
1497
1498 dump_completed(s);
1499
1500 return 0;
1501 }
1502
1503 static ram_addr_t get_start_block(DumpState *s)
1504 {
1505 GuestPhysBlock *block;
1506
1507 if (!s->has_filter) {
1508 s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1509 return 0;
1510 }
1511
1512 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
1513 if (block->target_start >= s->begin + s->length ||
1514 block->target_end <= s->begin) {
1515 /* This block is out of the range */
1516 continue;
1517 }
1518
1519 s->next_block = block;
1520 if (s->begin > block->target_start) {
1521 s->start = s->begin - block->target_start;
1522 } else {
1523 s->start = 0;
1524 }
1525 return s->start;
1526 }
1527
1528 return -1;
1529 }
1530
1531 static void get_max_mapnr(DumpState *s)
1532 {
1533 GuestPhysBlock *last_block;
1534
1535 last_block = QTAILQ_LAST(&s->guest_phys_blocks.head, GuestPhysBlockHead);
1536 s->max_mapnr = paddr_to_pfn(last_block->target_end);
1537 }
1538
1539 static int dump_init(DumpState *s, int fd, bool has_format,
1540 DumpGuestMemoryFormat format, bool paging, bool has_filter,
1541 int64_t begin, int64_t length, Error **errp)
1542 {
1543 CPUState *cpu;
1544 int nr_cpus;
1545 Error *err = NULL;
1546 int ret;
1547
1548 /* kdump-compressed is conflict with paging and filter */
1549 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1550 assert(!paging && !has_filter);
1551 }
1552
1553 if (runstate_is_running()) {
1554 vm_stop(RUN_STATE_SAVE_VM);
1555 s->resume = true;
1556 } else {
1557 s->resume = false;
1558 }
1559
1560 /* If we use KVM, we should synchronize the registers before we get dump
1561 * info or physmap info.
1562 */
1563 cpu_synchronize_all_states();
1564 nr_cpus = 0;
1565 CPU_FOREACH(cpu) {
1566 nr_cpus++;
1567 }
1568
1569 s->fd = fd;
1570 s->has_filter = has_filter;
1571 s->begin = begin;
1572 s->length = length;
1573
1574 guest_phys_blocks_init(&s->guest_phys_blocks);
1575 guest_phys_blocks_append(&s->guest_phys_blocks);
1576
1577 s->start = get_start_block(s);
1578 if (s->start == -1) {
1579 error_set(errp, QERR_INVALID_PARAMETER, "begin");
1580 goto cleanup;
1581 }
1582
1583 /* get dump info: endian, class and architecture.
1584 * If the target architecture is not supported, cpu_get_dump_info() will
1585 * return -1.
1586 */
1587 ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
1588 if (ret < 0) {
1589 error_set(errp, QERR_UNSUPPORTED);
1590 goto cleanup;
1591 }
1592
1593 s->note_size = cpu_get_note_size(s->dump_info.d_class,
1594 s->dump_info.d_machine, nr_cpus);
1595 if (s->note_size < 0) {
1596 error_set(errp, QERR_UNSUPPORTED);
1597 goto cleanup;
1598 }
1599
1600 /* get memory mapping */
1601 memory_mapping_list_init(&s->list);
1602 if (paging) {
1603 qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
1604 if (err != NULL) {
1605 error_propagate(errp, err);
1606 goto cleanup;
1607 }
1608 } else {
1609 qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
1610 }
1611
1612 s->nr_cpus = nr_cpus;
1613 s->page_size = TARGET_PAGE_SIZE;
1614
1615 get_max_mapnr(s);
1616
1617 uint64_t tmp;
1618 tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT), s->page_size);
1619 s->len_dump_bitmap = tmp * s->page_size;
1620
1621 /* init for kdump-compressed format */
1622 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1623 switch (format) {
1624 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
1625 s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
1626 break;
1627
1628 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
1629 s->flag_compress = DUMP_DH_COMPRESSED_LZO;
1630 break;
1631
1632 case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
1633 s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
1634 break;
1635
1636 default:
1637 s->flag_compress = 0;
1638 }
1639
1640 return 0;
1641 }
1642
1643 if (s->has_filter) {
1644 memory_mapping_filter(&s->list, s->begin, s->length);
1645 }
1646
1647 /*
1648 * calculate phdr_num
1649 *
1650 * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
1651 */
1652 s->phdr_num = 1; /* PT_NOTE */
1653 if (s->list.num < UINT16_MAX - 2) {
1654 s->phdr_num += s->list.num;
1655 s->have_section = false;
1656 } else {
1657 s->have_section = true;
1658 s->phdr_num = PN_XNUM;
1659 s->sh_info = 1; /* PT_NOTE */
1660
1661 /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
1662 if (s->list.num <= UINT32_MAX - 1) {
1663 s->sh_info += s->list.num;
1664 } else {
1665 s->sh_info = UINT32_MAX;
1666 }
1667 }
1668
1669 if (s->dump_info.d_class == ELFCLASS64) {
1670 if (s->have_section) {
1671 s->memory_offset = sizeof(Elf64_Ehdr) +
1672 sizeof(Elf64_Phdr) * s->sh_info +
1673 sizeof(Elf64_Shdr) + s->note_size;
1674 } else {
1675 s->memory_offset = sizeof(Elf64_Ehdr) +
1676 sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
1677 }
1678 } else {
1679 if (s->have_section) {
1680 s->memory_offset = sizeof(Elf32_Ehdr) +
1681 sizeof(Elf32_Phdr) * s->sh_info +
1682 sizeof(Elf32_Shdr) + s->note_size;
1683 } else {
1684 s->memory_offset = sizeof(Elf32_Ehdr) +
1685 sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
1686 }
1687 }
1688
1689 return 0;
1690
1691 cleanup:
1692 guest_phys_blocks_free(&s->guest_phys_blocks);
1693
1694 if (s->resume) {
1695 vm_start();
1696 }
1697
1698 return -1;
1699 }
1700
1701 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
1702 int64_t begin, bool has_length,
1703 int64_t length, bool has_format,
1704 DumpGuestMemoryFormat format, Error **errp)
1705 {
1706 const char *p;
1707 int fd = -1;
1708 DumpState *s;
1709 int ret;
1710
1711 /*
1712 * kdump-compressed format need the whole memory dumped, so paging or
1713 * filter is not supported here.
1714 */
1715 if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
1716 (paging || has_begin || has_length)) {
1717 error_setg(errp, "kdump-compressed format doesn't support paging or "
1718 "filter");
1719 return;
1720 }
1721 if (has_begin && !has_length) {
1722 error_set(errp, QERR_MISSING_PARAMETER, "length");
1723 return;
1724 }
1725 if (!has_begin && has_length) {
1726 error_set(errp, QERR_MISSING_PARAMETER, "begin");
1727 return;
1728 }
1729
1730 /* check whether lzo/snappy is supported */
1731 #ifndef CONFIG_LZO
1732 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
1733 error_setg(errp, "kdump-lzo is not available now");
1734 return;
1735 }
1736 #endif
1737
1738 #ifndef CONFIG_SNAPPY
1739 if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
1740 error_setg(errp, "kdump-snappy is not available now");
1741 return;
1742 }
1743 #endif
1744
1745 #if !defined(WIN32)
1746 if (strstart(file, "fd:", &p)) {
1747 fd = monitor_get_fd(cur_mon, p, errp);
1748 if (fd == -1) {
1749 return;
1750 }
1751 }
1752 #endif
1753
1754 if (strstart(file, "file:", &p)) {
1755 fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
1756 if (fd < 0) {
1757 error_setg_file_open(errp, errno, p);
1758 return;
1759 }
1760 }
1761
1762 if (fd == -1) {
1763 error_set(errp, QERR_INVALID_PARAMETER, "protocol");
1764 return;
1765 }
1766
1767 s = g_malloc0(sizeof(DumpState));
1768
1769 ret = dump_init(s, fd, has_format, format, paging, has_begin,
1770 begin, length, errp);
1771 if (ret < 0) {
1772 g_free(s);
1773 return;
1774 }
1775
1776 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1777 if (create_kdump_vmcore(s) < 0) {
1778 error_set(errp, QERR_IO_ERROR);
1779 }
1780 } else {
1781 if (create_vmcore(s) < 0) {
1782 error_set(errp, QERR_IO_ERROR);
1783 }
1784 }
1785
1786 g_free(s);
1787 }
1788
1789 DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
1790 {
1791 DumpGuestMemoryFormatList *item;
1792 DumpGuestMemoryCapability *cap =
1793 g_malloc0(sizeof(DumpGuestMemoryCapability));
1794
1795 /* elf is always available */
1796 item = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1797 cap->formats = item;
1798 item->value = DUMP_GUEST_MEMORY_FORMAT_ELF;
1799
1800 /* kdump-zlib is always available */
1801 item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1802 item = item->next;
1803 item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
1804
1805 /* add new item if kdump-lzo is available */
1806 #ifdef CONFIG_LZO
1807 item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1808 item = item->next;
1809 item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
1810 #endif
1811
1812 /* add new item if kdump-snappy is available */
1813 #ifdef CONFIG_SNAPPY
1814 item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1815 item = item->next;
1816 item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
1817 #endif
1818
1819 return cap;
1820 }