From: Anthony Liguori Date: Fri, 3 May 2013 16:20:15 +0000 (-0500) Subject: Merge remote-tracking branch 'stefanha/tracing' into staging X-Git-Tag: v1.5.0-rc0~31 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=743bddb4b35ceaaf6f95aea581a4130dcae6205a;hp=e64dd5efb2c6d522a3bc9d096cd49a4e53f0ae10;p=qemu.git Merge remote-tracking branch 'stefanha/tracing' into staging # By Eiichi Tsukata (2) and Kazuya Saito (2) # Via Stefan Hajnoczi * stefanha/tracing: trace: document ftrace backend trace: Add ftrace tracing backend kvm-all: add kvm_run_exit tracepoint kvm-all: add kvm_ioctl, kvm_vm_ioctl, kvm_vcpu_ioctl tracepoints Message-id: 1367582485-15579-1-git-send-email-stefanha@redhat.com Signed-off-by: Anthony Liguori --- diff --git a/block/Makefile.objs b/block/Makefile.objs index 6c4b5bc6b..5f0358a3d 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o +block-obj-y += vhdx.o block-obj-y += parallels.o blkdebug.o blkverify.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/vhdx.c b/block/vhdx.c new file mode 100644 index 000000000..e9704b1fd --- /dev/null +++ b/block/vhdx.c @@ -0,0 +1,972 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v0.95", published 4/12/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=29681 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qemu/crc32c.h" +#include "block/vhdx.h" + + +/* Several metadata and region table data entries are identified by + * guids in a MS-specific GUID format. */ + + +/* ------- Known Region Table GUIDs ---------------------- */ +static const MSGUID bat_guid = { .data1 = 0x2dc27766, + .data2 = 0xf623, + .data3 = 0x4200, + .data4 = { 0x9d, 0x64, 0x11, 0x5e, + 0x9b, 0xfd, 0x4a, 0x08} }; + +static const MSGUID metadata_guid = { .data1 = 0x8b7ca206, + .data2 = 0x4790, + .data3 = 0x4b9a, + .data4 = { 0xb8, 0xfe, 0x57, 0x5f, + 0x05, 0x0f, 0x88, 0x6e} }; + + + +/* ------- Known Metadata Entry GUIDs ---------------------- */ +static const MSGUID file_param_guid = { .data1 = 0xcaa16737, + .data2 = 0xfa36, + .data3 = 0x4d43, + .data4 = { 0xb3, 0xb6, 0x33, 0xf0, + 0xaa, 0x44, 0xe7, 0x6b} }; + +static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224, + .data2 = 0xcd1b, + .data3 = 0x4876, + .data4 = { 0xb2, 0x11, 0x5d, 0xbe, + 0xd8, 0x3b, 0xf4, 0xb8} }; + +static const MSGUID page83_guid = { .data1 = 0xbeca12ab, + .data2 = 0xb2e6, + .data3 = 0x4523, + .data4 = { 0x93, 0xef, 0xc3, 0x09, + 0xe0, 0x00, 0xc7, 0x46} }; + + +static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7, + .data2 = 0x445d, + .data3 = 0x4471, + .data4 = { 0x9c, 0xc9, 0xe9, 0x88, + 0x52, 0x51, 0xc5, 0x56} }; + +static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d, + .data2 = 0xb30b, + .data3 = 0x454d, + .data4 = { 0xab, 0xf7, 0xd3, + 0xd8, 0x48, 0x34, + 0xab, 0x0c} }; + +static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d, + .data2 = 0xa96f, + .data3 = 0x4709, + .data4 = { 0xba, 0x47, 0xf2, + 0x33, 0xa8, 0xfa, + 0xab, 0x5f} }; + +/* Each parent type must have a valid GUID; this is for parent images + * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would + * need to make up our own QCOW2 GUID type */ +static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7, + .data2 = 0xd19e, + .data3 = 0x4a81, + .data4 = { 0xb7, 0x89, 0x25, 0xb8, + 0xe9, 0x44, 0x59, 0x13} }; + + +#define META_FILE_PARAMETER_PRESENT 0x01 +#define META_VIRTUAL_DISK_SIZE_PRESENT 0x02 +#define META_PAGE_83_PRESENT 0x04 +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08 +#define META_PHYS_SECTOR_SIZE_PRESENT 0x10 +#define META_PARENT_LOCATOR_PRESENT 0x20 + +#define META_ALL_PRESENT \ + (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \ + META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \ + META_PHYS_SECTOR_SIZE_PRESENT) + +typedef struct VHDXMetadataEntries { + VHDXMetadataTableEntry file_parameters_entry; + VHDXMetadataTableEntry virtual_disk_size_entry; + VHDXMetadataTableEntry page83_data_entry; + VHDXMetadataTableEntry logical_sector_size_entry; + VHDXMetadataTableEntry phys_sector_size_entry; + VHDXMetadataTableEntry parent_locator_entry; + uint16_t present; +} VHDXMetadataEntries; + + +typedef struct VHDXSectorInfo { + uint32_t bat_idx; /* BAT entry index */ + uint32_t sectors_avail; /* sectors available in payload block */ + uint32_t bytes_left; /* bytes left in the block after data to r/w */ + uint32_t bytes_avail; /* bytes available in payload block */ + uint64_t file_offset; /* absolute offset in bytes, in file */ + uint64_t block_offset; /* block offset, in bytes */ +} VHDXSectorInfo; + + + +typedef struct BDRVVHDXState { + CoMutex lock; + + int curr_header; + VHDXHeader *headers[2]; + + VHDXRegionTableHeader rt; + VHDXRegionTableEntry bat_rt; /* region table for the BAT */ + VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ + + VHDXMetadataTableHeader metadata_hdr; + VHDXMetadataEntries metadata_entries; + + VHDXFileParameters params; + uint32_t block_size; + uint32_t block_size_bits; + uint32_t sectors_per_block; + uint32_t sectors_per_block_bits; + + uint64_t virtual_disk_size; + uint32_t logical_sector_size; + uint32_t physical_sector_size; + + uint64_t chunk_ratio; + uint32_t chunk_ratio_bits; + uint32_t logical_sector_size_bits; + + uint32_t bat_entries; + VHDXBatEntry *bat; + uint64_t bat_offset; + + VHDXParentLocatorHeader parent_header; + VHDXParentLocatorEntry *parent_entries; + +} BDRVVHDXState; + +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, + int crc_offset) +{ + uint32_t crc_new; + uint32_t crc_orig; + assert(buf != NULL); + + if (crc_offset > 0) { + memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig)); + memset(buf + crc_offset, 0, sizeof(crc_orig)); + } + + crc_new = crc32c(crc, buf, size); + if (crc_offset > 0) { + memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig)); + } + + return crc_new; +} + +/* Validates the checksum of the buffer, with an in-place CRC. + * + * Zero is substituted during crc calculation for the original crc field, + * and the crc field is restored afterwards. But the buffer will be modifed + * during the calculation, so this may not be not suitable for multi-threaded + * use. + * + * crc_offset: byte offset in buf of the buffer crc + * buf: buffer pointer + * size: size of buffer (must be > crc_offset+4) + * + * returns true if checksum is valid, false otherwise + */ +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset) +{ + uint32_t crc_orig; + uint32_t crc; + + assert(buf != NULL); + assert(size > (crc_offset + 4)); + + memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig)); + crc_orig = le32_to_cpu(crc_orig); + + crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset); + + return crc == crc_orig; +} + + +/* + * Per the MS VHDX Specification, for every VHDX file: + * - The header section is fixed size - 1 MB + * - The header section is always the first "object" + * - The first 64KB of the header is the File Identifier + * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile") + * - The following 512 bytes constitute a UTF-16 string identifiying the + * software that created the file, and is optional and diagnostic only. + * + * Therefore, we probe by looking for the vhdxfile signature "vhdxfile" + */ +static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) { + return 100; + } + return 0; +} + +/* All VHDX structures on disk are little endian */ +static void vhdx_header_le_import(VHDXHeader *h) +{ + assert(h != NULL); + + le32_to_cpus(&h->signature); + le32_to_cpus(&h->checksum); + le64_to_cpus(&h->sequence_number); + + leguid_to_cpus(&h->file_write_guid); + leguid_to_cpus(&h->data_write_guid); + leguid_to_cpus(&h->log_guid); + + le16_to_cpus(&h->log_version); + le16_to_cpus(&h->version); + le32_to_cpus(&h->log_length); + le64_to_cpus(&h->log_offset); +} + + +/* opens the specified header block from the VHDX file header section */ +static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + VHDXHeader *header1; + VHDXHeader *header2; + bool h1_valid = false; + bool h2_valid = false; + uint64_t h1_seq = 0; + uint64_t h2_seq = 0; + uint8_t *buffer; + + header1 = qemu_blockalign(bs, sizeof(VHDXHeader)); + header2 = qemu_blockalign(bs, sizeof(VHDXHeader)); + + buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE); + + s->headers[0] = header1; + s->headers[1] = header2; + + /* We have to read the whole VHDX_HEADER_SIZE instead of + * sizeof(VHDXHeader), because the checksum is over the whole + * region */ + ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE); + if (ret < 0) { + goto fail; + } + /* copy over just the relevant portion that we need */ + memcpy(header1, buffer, sizeof(VHDXHeader)); + vhdx_header_le_import(header1); + + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) && + !memcmp(&header1->signature, "head", 4) && + header1->version == 1) { + h1_seq = header1->sequence_number; + h1_valid = true; + } + + ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE); + if (ret < 0) { + goto fail; + } + /* copy over just the relevant portion that we need */ + memcpy(header2, buffer, sizeof(VHDXHeader)); + vhdx_header_le_import(header2); + + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) && + !memcmp(&header2->signature, "head", 4) && + header2->version == 1) { + h2_seq = header2->sequence_number; + h2_valid = true; + } + + /* If there is only 1 valid header (or no valid headers), we + * don't care what the sequence numbers are */ + if (h1_valid && !h2_valid) { + s->curr_header = 0; + } else if (!h1_valid && h2_valid) { + s->curr_header = 1; + } else if (!h1_valid && !h2_valid) { + ret = -EINVAL; + goto fail; + } else { + /* If both headers are valid, then we choose the active one by the + * highest sequence number. If the sequence numbers are equal, that is + * invalid */ + if (h1_seq > h2_seq) { + s->curr_header = 0; + } else if (h2_seq > h1_seq) { + s->curr_header = 1; + } else { + ret = -EINVAL; + goto fail; + } + } + + ret = 0; + + goto exit; + +fail: + qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found"); + qemu_vfree(header1); + qemu_vfree(header2); + s->headers[0] = NULL; + s->headers[1] = NULL; +exit: + qemu_vfree(buffer); + return ret; +} + + +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + uint8_t *buffer; + int offset = 0; + VHDXRegionTableEntry rt_entry; + uint32_t i; + bool bat_rt_found = false; + bool metadata_rt_found = false; + + /* We have to read the whole 64KB block, because the crc32 is over the + * whole block */ + buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE); + + ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer, + VHDX_HEADER_BLOCK_SIZE); + if (ret < 0) { + goto fail; + } + memcpy(&s->rt, buffer, sizeof(s->rt)); + le32_to_cpus(&s->rt.signature); + le32_to_cpus(&s->rt.checksum); + le32_to_cpus(&s->rt.entry_count); + le32_to_cpus(&s->rt.reserved); + offset += sizeof(s->rt); + + if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) || + memcmp(&s->rt.signature, "regi", 4)) { + ret = -EINVAL; + goto fail; + } + + /* Per spec, maximum region table entry count is 2047 */ + if (s->rt.entry_count > 2047) { + ret = -EINVAL; + goto fail; + } + + for (i = 0; i < s->rt.entry_count; i++) { + memcpy(&rt_entry, buffer + offset, sizeof(rt_entry)); + offset += sizeof(rt_entry); + + leguid_to_cpus(&rt_entry.guid); + le64_to_cpus(&rt_entry.file_offset); + le32_to_cpus(&rt_entry.length); + le32_to_cpus(&rt_entry.data_bits); + + /* see if we recognize the entry */ + if (guid_eq(rt_entry.guid, bat_guid)) { + /* must be unique; if we have already found it this is invalid */ + if (bat_rt_found) { + ret = -EINVAL; + goto fail; + } + bat_rt_found = true; + s->bat_rt = rt_entry; + continue; + } + + if (guid_eq(rt_entry.guid, metadata_guid)) { + /* must be unique; if we have already found it this is invalid */ + if (metadata_rt_found) { + ret = -EINVAL; + goto fail; + } + metadata_rt_found = true; + s->metadata_rt = rt_entry; + continue; + } + + if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) { + /* cannot read vhdx file - required region table entry that + * we do not understand. per spec, we must fail to open */ + ret = -ENOTSUP; + goto fail; + } + } + ret = 0; + +fail: + qemu_vfree(buffer); + return ret; +} + + + +/* Metadata initial parser + * + * This loads all the metadata entry fields. This may cause additional + * fields to be processed (e.g. parent locator, etc..). + * + * There are 5 Metadata items that are always required: + * - File Parameters (block size, has a parent) + * - Virtual Disk Size (size, in bytes, of the virtual drive) + * - Page 83 Data (scsi page 83 guid) + * - Logical Sector Size (logical sector size in bytes, either 512 or + * 4096. We only support 512 currently) + * - Physical Sector Size (512 or 4096) + * + * Also, if the File Parameters indicate this is a differencing file, + * we must also look for the Parent Locator metadata item. + */ +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + uint8_t *buffer; + int offset = 0; + uint32_t i = 0; + VHDXMetadataTableEntry md_entry; + + buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE); + + ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer, + VHDX_METADATA_TABLE_MAX_SIZE); + if (ret < 0) { + goto exit; + } + memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr)); + offset += sizeof(s->metadata_hdr); + + le64_to_cpus(&s->metadata_hdr.signature); + le16_to_cpus(&s->metadata_hdr.reserved); + le16_to_cpus(&s->metadata_hdr.entry_count); + + if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) { + ret = -EINVAL; + goto exit; + } + + s->metadata_entries.present = 0; + + if ((s->metadata_hdr.entry_count * sizeof(md_entry)) > + (VHDX_METADATA_TABLE_MAX_SIZE - offset)) { + ret = -EINVAL; + goto exit; + } + + for (i = 0; i < s->metadata_hdr.entry_count; i++) { + memcpy(&md_entry, buffer + offset, sizeof(md_entry)); + offset += sizeof(md_entry); + + leguid_to_cpus(&md_entry.item_id); + le32_to_cpus(&md_entry.offset); + le32_to_cpus(&md_entry.length); + le32_to_cpus(&md_entry.data_bits); + le32_to_cpus(&md_entry.reserved2); + + if (guid_eq(md_entry.item_id, file_param_guid)) { + if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.file_parameters_entry = md_entry; + s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, virtual_size_guid)) { + if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.virtual_disk_size_entry = md_entry; + s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, page83_guid)) { + if (s->metadata_entries.present & META_PAGE_83_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.page83_data_entry = md_entry; + s->metadata_entries.present |= META_PAGE_83_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, logical_sector_guid)) { + if (s->metadata_entries.present & + META_LOGICAL_SECTOR_SIZE_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.logical_sector_size_entry = md_entry; + s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, phys_sector_guid)) { + if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.phys_sector_size_entry = md_entry; + s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT; + continue; + } + + if (guid_eq(md_entry.item_id, parent_locator_guid)) { + if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) { + ret = -EINVAL; + goto exit; + } + s->metadata_entries.parent_locator_entry = md_entry; + s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT; + continue; + } + + if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) { + /* cannot read vhdx file - required region table entry that + * we do not understand. per spec, we must fail to open */ + ret = -ENOTSUP; + goto exit; + } + } + + if (s->metadata_entries.present != META_ALL_PRESENT) { + ret = -ENOTSUP; + goto exit; + } + + ret = bdrv_pread(bs->file, + s->metadata_entries.file_parameters_entry.offset + + s->metadata_rt.file_offset, + &s->params, + sizeof(s->params)); + + if (ret < 0) { + goto exit; + } + + le32_to_cpus(&s->params.block_size); + le32_to_cpus(&s->params.data_bits); + + + /* We now have the file parameters, so we can tell if this is a + * differencing file (i.e.. has_parent), is dynamic or fixed + * sized (leave_blocks_allocated), and the block size */ + + /* The parent locator required iff the file parameters has_parent set */ + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) { + /* TODO: parse parent locator fields */ + ret = -ENOTSUP; /* temp, until differencing files are supported */ + goto exit; + } else { + /* if has_parent is set, but there is not parent locator present, + * then that is an invalid combination */ + ret = -EINVAL; + goto exit; + } + } + + /* determine virtual disk size, logical sector size, + * and phys sector size */ + + ret = bdrv_pread(bs->file, + s->metadata_entries.virtual_disk_size_entry.offset + + s->metadata_rt.file_offset, + &s->virtual_disk_size, + sizeof(uint64_t)); + if (ret < 0) { + goto exit; + } + ret = bdrv_pread(bs->file, + s->metadata_entries.logical_sector_size_entry.offset + + s->metadata_rt.file_offset, + &s->logical_sector_size, + sizeof(uint32_t)); + if (ret < 0) { + goto exit; + } + ret = bdrv_pread(bs->file, + s->metadata_entries.phys_sector_size_entry.offset + + s->metadata_rt.file_offset, + &s->physical_sector_size, + sizeof(uint32_t)); + if (ret < 0) { + goto exit; + } + + le64_to_cpus(&s->virtual_disk_size); + le32_to_cpus(&s->logical_sector_size); + le32_to_cpus(&s->physical_sector_size); + + if (s->logical_sector_size == 0 || s->params.block_size == 0) { + ret = -EINVAL; + goto exit; + } + + /* both block_size and sector_size are guaranteed powers of 2 */ + s->sectors_per_block = s->params.block_size / s->logical_sector_size; + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) * + (uint64_t)s->logical_sector_size / + (uint64_t)s->params.block_size; + + /* These values are ones we will want to use for division / multiplication + * later on, and they are all guaranteed (per the spec) to be powers of 2, + * so we can take advantage of that for shift operations during + * reads/writes */ + if (s->logical_sector_size & (s->logical_sector_size - 1)) { + ret = -EINVAL; + goto exit; + } + if (s->sectors_per_block & (s->sectors_per_block - 1)) { + ret = -EINVAL; + goto exit; + } + if (s->chunk_ratio & (s->chunk_ratio - 1)) { + ret = -EINVAL; + goto exit; + } + s->block_size = s->params.block_size; + if (s->block_size & (s->block_size - 1)) { + ret = -EINVAL; + goto exit; + } + + s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size); + s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block); + s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio); + s->block_size_bits = 31 - clz32(s->block_size); + + ret = 0; + +exit: + qemu_vfree(buffer); + return ret; +} + +/* Parse the replay log. Per the VHDX spec, if the log is present + * it must be replayed prior to opening the file, even read-only. + * + * If read-only, we must replay the log in RAM (or refuse to open + * a dirty VHDX file read-only */ +static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s) +{ + int ret = 0; + int i; + VHDXHeader *hdr; + + hdr = s->headers[s->curr_header]; + + /* either the log guid, or log length is zero, + * then a replay log is present */ + for (i = 0; i < sizeof(hdr->log_guid.data4); i++) { + ret |= hdr->log_guid.data4[i]; + } + if (hdr->log_guid.data1 == 0 && + hdr->log_guid.data2 == 0 && + hdr->log_guid.data3 == 0 && + ret == 0) { + goto exit; + } + + /* per spec, only log version of 0 is supported */ + if (hdr->log_version != 0) { + ret = -EINVAL; + goto exit; + } + + if (hdr->log_length == 0) { + goto exit; + } + + /* We currently do not support images with logs to replay */ + ret = -ENOTSUP; + +exit: + return ret; +} + + +static int vhdx_open(BlockDriverState *bs, QDict *options, int flags) +{ + BDRVVHDXState *s = bs->opaque; + int ret = 0; + uint32_t i; + uint64_t signature; + uint32_t data_blocks_cnt, bitmap_blocks_cnt; + + + s->bat = NULL; + + qemu_co_mutex_init(&s->lock); + + /* validate the file signature */ + ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t)); + if (ret < 0) { + goto fail; + } + if (memcmp(&signature, "vhdxfile", 8)) { + ret = -EINVAL; + goto fail; + } + + ret = vhdx_parse_header(bs, s); + if (ret) { + goto fail; + } + + ret = vhdx_parse_log(bs, s); + if (ret) { + goto fail; + } + + ret = vhdx_open_region_tables(bs, s); + if (ret) { + goto fail; + } + + ret = vhdx_parse_metadata(bs, s); + if (ret) { + goto fail; + } + s->block_size = s->params.block_size; + + /* the VHDX spec dictates that virtual_disk_size is always a multiple of + * logical_sector_size */ + bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits; + + data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits; + if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) { + data_blocks_cnt++; + } + bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits; + if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) { + bitmap_blocks_cnt++; + } + + if (s->parent_entries) { + s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); + } else { + s->bat_entries = data_blocks_cnt + + ((data_blocks_cnt - 1) >> s->chunk_ratio_bits); + } + + s->bat_offset = s->bat_rt.file_offset; + + if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) { + /* BAT allocation is not large enough for all entries */ + ret = -EINVAL; + goto fail; + } + + s->bat = qemu_blockalign(bs, s->bat_rt.length); + + ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < s->bat_entries; i++) { + le64_to_cpus(&s->bat[i]); + } + + if (flags & BDRV_O_RDWR) { + ret = -ENOTSUP; + goto fail; + } + + /* TODO: differencing files, write */ + + return 0; +fail: + qemu_vfree(s->headers[0]); + qemu_vfree(s->headers[1]); + qemu_vfree(s->bat); + qemu_vfree(s->parent_entries); + return ret; +} + +static int vhdx_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + + +/* + * Perform sector to block offset translations, to get various + * sector and file offsets into the image. See VHDXSectorInfo + */ +static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num, + int nb_sectors, VHDXSectorInfo *sinfo) +{ + uint32_t block_offset; + + sinfo->bat_idx = sector_num >> s->sectors_per_block_bits; + /* effectively a modulo - this gives us the offset into the block + * (in sector sizes) for our sector number */ + block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits); + /* the chunk ratio gives us the interleaving of the sector + * bitmaps, so we need to advance our page block index by the + * sector bitmaps entry number */ + sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits; + + /* the number of sectors we can read/write in this cycle */ + sinfo->sectors_avail = s->sectors_per_block - block_offset; + + sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits; + + if (sinfo->sectors_avail > nb_sectors) { + sinfo->sectors_avail = nb_sectors; + } + + sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits; + + sinfo->file_offset = s->bat[sinfo->bat_idx] >> VHDX_BAT_FILE_OFF_BITS; + + sinfo->block_offset = block_offset << s->logical_sector_size_bits; + + /* The file offset must be past the header section, so must be > 0 */ + if (sinfo->file_offset == 0) { + return; + } + + /* block offset is the offset in vhdx logical sectors, in + * the payload data block. Convert that to a byte offset + * in the block, and add in the payload data block offset + * in the file, in bytes, to get the final read address */ + + sinfo->file_offset <<= 20; /* now in bytes, rather than 1MB units */ + sinfo->file_offset += sinfo->block_offset; +} + + + +static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + BDRVVHDXState *s = bs->opaque; + int ret = 0; + VHDXSectorInfo sinfo; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + qemu_co_mutex_lock(&s->lock); + + while (nb_sectors > 0) { + /* We are a differencing file, so we need to inspect the sector bitmap + * to see if we have the data or not */ + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) { + /* not supported yet */ + ret = -ENOTSUP; + goto exit; + } else { + vhdx_block_translate(s, sector_num, nb_sectors, &sinfo); + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, sinfo.bytes_avail); + + /* check the payload block state */ + switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) { + case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */ + case PAYLOAD_BLOCK_UNDEFINED: /* fall through */ + case PAYLOAD_BLOCK_UNMAPPED: /* fall through */ + case PAYLOAD_BLOCK_ZERO: + /* return zero */ + qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail); + break; + case PAYLOAD_BLOCK_FULL_PRESENT: + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_readv(bs->file, + sinfo.file_offset >> BDRV_SECTOR_BITS, + sinfo.sectors_avail, &hd_qiov); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto exit; + } + break; + case PAYLOAD_BLOCK_PARTIALLY_PRESENT: + /* we don't yet support difference files, fall through + * to error */ + default: + ret = -EIO; + goto exit; + break; + } + nb_sectors -= sinfo.sectors_avail; + sector_num += sinfo.sectors_avail; + bytes_done += sinfo.bytes_avail; + } + } + ret = 0; +exit: + qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&hd_qiov); + return ret; +} + + + +static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + return -ENOTSUP; +} + + +static void vhdx_close(BlockDriverState *bs) +{ + BDRVVHDXState *s = bs->opaque; + qemu_vfree(s->headers[0]); + qemu_vfree(s->headers[1]); + qemu_vfree(s->bat); + qemu_vfree(s->parent_entries); +} + +static BlockDriver bdrv_vhdx = { + .format_name = "vhdx", + .instance_size = sizeof(BDRVVHDXState), + .bdrv_probe = vhdx_probe, + .bdrv_open = vhdx_open, + .bdrv_close = vhdx_close, + .bdrv_reopen_prepare = vhdx_reopen_prepare, + .bdrv_co_readv = vhdx_co_readv, + .bdrv_co_writev = vhdx_co_writev, +}; + +static void bdrv_vhdx_init(void) +{ + bdrv_register(&bdrv_vhdx); +} + +block_init(bdrv_vhdx_init); diff --git a/block/vhdx.h b/block/vhdx.h new file mode 100644 index 000000000..c3b64c6ff --- /dev/null +++ b/block/vhdx.h @@ -0,0 +1,325 @@ +/* + * Block driver for Hyper-V VHDX Images + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * This is based on the "VHDX Format Specification v0.95", published 4/12/2012 + * by Microsoft: + * https://www.microsoft.com/en-us/download/details.aspx?id=29681 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef BLOCK_VHDX_H +#define BLOCK_VHDX_H + +/* Structures and fields present in the VHDX file */ + +/* The header section has the following blocks, + * each block is 64KB: + * + * _____________________________________________________________________________ + * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) | + * |----------|---------------|------------|--------------|--------------------| + * | | | | | | + * 0.........64KB...........128KB........192KB..........256KB................1MB + */ + +#define VHDX_HEADER_BLOCK_SIZE (64*1024) + +#define VHDX_FILE_ID_OFFSET 0 +#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE*1) +#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE*2) +#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE*3) + + +/* + * A note on the use of MS-GUID fields. For more details on the GUID, + * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. + * + * The VHDX specification only states that these are MS GUIDs, and which + * bytes are data1-data4. It makes no mention of what algorithm should be used + * to generate the GUID, nor what standard. However, looking at the specified + * known GUID fields, it appears the GUIDs are: + * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4) + * Random algorithm (noted by 0x4XXX for .data3) + */ + +/* ---- HEADER SECTION STRUCTURES ---- */ + +/* These structures are ones that are defined in the VHDX specification + * document */ + +typedef struct VHDXFileIdentifier { + uint64_t signature; /* "vhdxfile" in ASCII */ + uint16_t creator[256]; /* optional; utf-16 string to identify + the vhdx file creator. Diagnotistic + only */ +} VHDXFileIdentifier; + + +/* the guid is a 16 byte unique ID - the definition for this used by + * Microsoft is not just 16 bytes though - it is a structure that is defined, + * so we need to follow it here so that endianness does not trip us up */ + +typedef struct MSGUID { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MSGUID; + +#define guid_eq(a, b) \ + (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) + +#define VHDX_HEADER_SIZE (4*1024) /* although the vhdx_header struct in disk + is only 582 bytes, for purposes of crc + the header is the first 4KB of the 64KB + block */ + +/* The full header is 4KB, although the actual header data is much smaller. + * But for the checksum calculation, it is over the entire 4KB structure, + * not just the defined portion of it */ +typedef struct QEMU_PACKED VHDXHeader { + uint32_t signature; /* "head" in ASCII */ + uint32_t checksum; /* CRC-32C hash of the whole header */ + uint64_t sequence_number; /* Seq number of this header. Each + VHDX file has 2 of these headers, + and only the header with the highest + sequence number is valid */ + MSGUID file_write_guid; /* 128 bit unique identifier. Must be + updated to new, unique value before + the first modification is made to + file */ + MSGUID data_write_guid; /* 128 bit unique identifier. Must be + updated to new, unique value before + the first modification is made to + visible data. Visbile data is + defined as: + - system & user metadata + - raw block data + - disk size + - any change that will + cause the virtual disk + sector read to differ + + This does not need to change if + blocks are re-arranged */ + MSGUID log_guid; /* 128 bit unique identifier. If zero, + there is no valid log. If non-zero, + log entries with this guid are + valid. */ + uint16_t log_version; /* version of the log format. Mustn't be + zero, unless log_guid is also zero */ + uint16_t version; /* version of th evhdx file. Currently, + only supported version is "1" */ + uint32_t log_length; /* length of the log. Must be multiple + of 1MB */ + uint64_t log_offset; /* byte offset in the file of the log. + Must also be a multiple of 1MB */ +} VHDXHeader; + +/* Header for the region table block */ +typedef struct QEMU_PACKED VHDXRegionTableHeader { + uint32_t signature; /* "regi" in ASCII */ + uint32_t checksum; /* CRC-32C hash of the 64KB table */ + uint32_t entry_count; /* number of valid entries */ + uint32_t reserved; +} VHDXRegionTableHeader; + +/* Individual region table entry. There may be a maximum of 2047 of these + * + * There are two known region table properties. Both are required. + * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08 + * Metadata: 8B7CA20647904B9AB8FE575F050F886E + */ +#define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand + this entry in order to open + file */ +typedef struct QEMU_PACKED VHDXRegionTableEntry { + MSGUID guid; /* 128-bit unique identifier */ + uint64_t file_offset; /* offset of the object in the file. + Must be multiple of 1MB */ + uint32_t length; /* length, in bytes, of the object */ + uint32_t data_bits; +} VHDXRegionTableEntry; + + +/* ---- LOG ENTRY STRUCTURES ---- */ +#define VHDX_LOG_HDR_SIZE 64 +typedef struct QEMU_PACKED VHDXLogEntryHeader { + uint32_t signature; /* "loge" in ASCII */ + uint32_t checksum; /* CRC-32C hash of the 64KB table */ + uint32_t entry_length; /* length in bytes, multiple of 1MB */ + uint32_t tail; /* byte offset of first log entry of a + seq, where this entry is the last + entry */ + uint64_t sequence_number; /* incremented with each log entry. + May not be zero. */ + uint32_t descriptor_count; /* number of descriptors in this log + entry, must be >= 0 */ + uint32_t reserved; + MSGUID log_guid; /* value of the log_guid from + vhdx_header. If not found in + vhdx_header, it is invalid */ + uint64_t flushed_file_offset; /* see spec for full details - this + sould be vhdx file size in bytes */ + uint64_t last_file_offset; /* size in bytes that all allocated + file structures fit into */ +} VHDXLogEntryHeader; + +#define VHDX_LOG_DESC_SIZE 32 + +typedef struct QEMU_PACKED VHDXLogDescriptor { + uint32_t signature; /* "zero" or "desc" in ASCII */ + union { + uint32_t reserved; /* zero desc */ + uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the + data sector */ + }; + union { + uint64_t zero_length; /* zero desc: length of the section to + zero */ + uint64_t leading_bytes; /* data desc: bytes 0-7 of the data + sector */ + }; + uint64_t file_offset; /* file offset to write zeros - multiple + of 4kB */ + uint64_t sequence_number; /* must match same field in + vhdx_log_entry_header */ +} VHDXLogDescriptor; + +typedef struct QEMU_PACKED VHDXLogDataSector { + uint32_t data_signature; /* "data" in ASCII */ + uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ + uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive). + see the data descriptor field for the + other mising bytes */ + uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */ +} VHDXLogDataSector; + + + +/* block states - different state values depending on whether it is a + * payload block, or a sector block. */ + +#define PAYLOAD_BLOCK_NOT_PRESENT 0 +#define PAYLOAD_BLOCK_UNDEFINED 1 +#define PAYLOAD_BLOCK_ZERO 2 +#define PAYLOAD_BLOCK_UNMAPPED 5 +#define PAYLOAD_BLOCK_FULL_PRESENT 6 +#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 + +#define SB_BLOCK_NOT_PRESENT 0 +#define SB_BLOCK_PRESENT 6 + +/* per the spec */ +#define VHDX_MAX_SECTORS_PER_BLOCK (1<<23) + +/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state + other bits are reserved */ +#define VHDX_BAT_STATE_BIT_MASK 0x07 +#define VHDX_BAT_FILE_OFF_BITS (64-44) +typedef uint64_t VHDXBatEntry; + +/* ---- METADATA REGION STRUCTURES ---- */ + +#define VHDX_METADATA_ENTRY_SIZE 32 +#define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ +#define VHDX_METADATA_TABLE_MAX_SIZE \ + (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) +typedef struct QEMU_PACKED VHDXMetadataTableHeader { + uint64_t signature; /* "metadata" in ASCII */ + uint16_t reserved; + uint16_t entry_count; /* number table entries. <= 2047 */ + uint32_t reserved2[5]; +} VHDXMetadataTableHeader; + +#define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */ +#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set, + otherwise file metdata */ +#define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this + entry to open the file */ +typedef struct QEMU_PACKED VHDXMetadataTableEntry { + MSGUID item_id; /* 128-bit identifier for metadata */ + uint32_t offset; /* byte offset of the metadata. At + least 64kB. Relative to start of + metadata region */ + /* note: if length = 0, so is offset */ + uint32_t length; /* length of metadata. <= 1MB. */ + uint32_t data_bits; /* least-significant 3 bits are flags, the + rest are reserved (see above) */ + uint32_t reserved2; +} VHDXMetadataTableEntry; + +#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to + be BLOCK_NOT_PRESENT. + If set indicates a fixed + size VHDX file */ +#define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ +typedef struct QEMU_PACKED VHDXFileParameters { + uint32_t block_size; /* size of each payload block, always + power of 2, <= 256MB and >= 1MB. */ + uint32_t data_bits; /* least-significant 2 bits are flags, the rest + are reserved (see above) */ +} VHDXFileParameters; + +typedef struct QEMU_PACKED VHDXVirtualDiskSize { + uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. + Must be multiple of the sector size, + max of 64TB */ +} VHDXVirtualDiskSize; + +typedef struct QEMU_PACKED VHDXPage83Data { + MSGUID page_83_data[16]; /* unique id for scsi devices that + support page 0x83 */ +} VHDXPage83Data; + +typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize { + uint32_t logical_sector_size; /* virtual disk sector size (in bytes). + Can only be 512 or 4096 bytes */ +} VHDXVirtualDiskLogicalSectorSize; + +typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { + uint32_t physical_sector_size; /* physical sector size (in bytes). + Can only be 512 or 4096 bytes */ +} VHDXVirtualDiskPhysicalSectorSize; + +typedef struct QEMU_PACKED VHDXParentLocatorHeader { + MSGUID locator_type[16]; /* type of the parent virtual disk. */ + uint16_t reserved; + uint16_t key_value_count; /* number of key/value pairs for this + locator */ +} VHDXParentLocatorHeader; + +/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */ +typedef struct QEMU_PACKED VHDXParentLocatorEntry { + uint32_t key_offset; /* offset in metadata for key, > 0 */ + uint32_t value_offset; /* offset in metadata for value, >0 */ + uint16_t key_length; /* length of entry key, > 0 */ + uint16_t value_length; /* length of entry value, > 0 */ +} VHDXParentLocatorEntry; + + +/* ----- END VHDX SPECIFICATION STRUCTURES ---- */ + + +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, + int crc_offset); + +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); + + +static void leguid_to_cpus(MSGUID *guid) +{ + le32_to_cpus(&guid->data1); + le16_to_cpus(&guid->data2); + le16_to_cpus(&guid->data3); +} + +#endif diff --git a/block/vmdk.c b/block/vmdk.c index 7bad757a3..608daaf93 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -32,11 +32,25 @@ #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') #define VMDK4_COMPRESSION_DEFLATE 1 +#define VMDK4_FLAG_NL_DETECT (1 << 0) #define VMDK4_FLAG_RGD (1 << 1) +/* Zeroed-grain enable bit */ +#define VMDK4_FLAG_ZERO_GRAIN (1 << 2) #define VMDK4_FLAG_COMPRESS (1 << 16) #define VMDK4_FLAG_MARKER (1 << 17) #define VMDK4_GD_AT_END 0xffffffffffffffffULL +#define VMDK_GTE_ZEROED 0x1 + +/* VMDK internal error codes */ +#define VMDK_OK 0 +#define VMDK_ERROR (-1) +/* Cluster not allocated */ +#define VMDK_UNALLOC (-2) +#define VMDK_ZEROED (-3) + +#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain" + typedef struct { uint32_t version; uint32_t flags; @@ -73,6 +87,8 @@ typedef struct VmdkExtent { bool flat; bool compressed; bool has_marker; + bool has_zero_grain; + int version; int64_t sectors; int64_t end_sector; int64_t flat_start_offset; @@ -108,6 +124,7 @@ typedef struct VmdkMetaData { unsigned int l2_index; unsigned int l2_offset; int valid; + uint32_t *l2_cache_entry; } VmdkMetaData; typedef struct VmdkGrainMarker { @@ -561,6 +578,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, extent->compressed = le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; + extent->version = le32_to_cpu(header.version); + extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; ret = vmdk_init_tables(bs, extent); if (ret) { /* free extent allocated by vmdk_add_extent */ @@ -578,22 +597,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, opt_pos = strstr(desc, opt_name); if (!opt_pos) { - return -1; + return VMDK_ERROR; } /* Skip "=\"" following opt_name */ opt_pos += strlen(opt_name) + 2; if (opt_pos >= end) { - return -1; + return VMDK_ERROR; } opt_end = opt_pos; while (opt_end < end && *opt_end != '"') { opt_end++; } if (opt_end == end || buf_size < opt_end - opt_pos + 1) { - return -1; + return VMDK_ERROR; } pstrcpy(buf, opt_end - opt_pos + 1, opt_pos); - return 0; + return VMDK_OK; } /* Open an extent file and append to bs array */ @@ -772,7 +791,7 @@ static int get_whole_cluster(BlockDriverState *bs, int ret; if (!vmdk_is_cid_valid(bs)) { - return -1; + return VMDK_ERROR; } /* floor offset to cluster */ @@ -780,30 +799,31 @@ static int get_whole_cluster(BlockDriverState *bs, ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain, extent->cluster_sectors); if (ret < 0) { - return -1; + return VMDK_ERROR; } /* Write grain only into the active image */ ret = bdrv_write(extent->file, cluster_offset, whole_grain, extent->cluster_sectors); if (ret < 0) { - return -1; + return VMDK_ERROR; } } - return 0; + return VMDK_OK; } static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) { + uint32_t offset; + QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset)); + offset = cpu_to_le32(m_data->offset); /* update L2 table */ if (bdrv_pwrite_sync( extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), - sizeof(m_data->offset) - ) < 0) { - return -1; + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; } /* update backup L2 table */ if (extent->l1_backup_table_offset != 0) { @@ -812,13 +832,15 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data) extent->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), sizeof(m_data->offset) - ) < 0) { - return -1; + &offset, sizeof(offset)) < 0) { + return VMDK_ERROR; } } + if (m_data->l2_cache_entry) { + *m_data->l2_cache_entry = offset; + } - return 0; + return VMDK_OK; } static int get_cluster_offset(BlockDriverState *bs, @@ -830,24 +852,25 @@ static int get_cluster_offset(BlockDriverState *bs, { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; - uint32_t min_count, *l2_table, tmp = 0; + uint32_t min_count, *l2_table; + bool zeroed = false; if (m_data) { m_data->valid = 0; } if (extent->flat) { *cluster_offset = extent->flat_start_offset; - return 0; + return VMDK_OK; } offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; l1_index = (offset >> 9) / extent->l1_entry_sectors; if (l1_index >= extent->l1_size) { - return -1; + return VMDK_ERROR; } l2_offset = extent->l1_table[l1_index]; if (!l2_offset) { - return -1; + return VMDK_UNALLOC; } for (i = 0; i < L2_CACHE_SIZE; i++) { if (l2_offset == extent->l2_cache_offsets[i]) { @@ -877,7 +900,7 @@ static int get_cluster_offset(BlockDriverState *bs, l2_table, extent->l2_size * sizeof(uint32_t) ) != extent->l2_size * sizeof(uint32_t)) { - return -1; + return VMDK_ERROR; } extent->l2_cache_offsets[min_index] = l2_offset; @@ -886,9 +909,21 @@ static int get_cluster_offset(BlockDriverState *bs, l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; *cluster_offset = le32_to_cpu(l2_table[l2_index]); - if (!*cluster_offset) { + if (m_data) { + m_data->valid = 1; + m_data->l1_index = l1_index; + m_data->l2_index = l2_index; + m_data->offset = *cluster_offset; + m_data->l2_offset = l2_offset; + m_data->l2_cache_entry = &l2_table[l2_index]; + } + if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) { + zeroed = true; + } + + if (!*cluster_offset || zeroed) { if (!allocate) { - return -1; + return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; } /* Avoid the L2 tables update for the images that have snapshots. */ @@ -901,8 +936,7 @@ static int get_cluster_offset(BlockDriverState *bs, } *cluster_offset >>= 9; - tmp = cpu_to_le32(*cluster_offset); - l2_table[l2_index] = tmp; + l2_table[l2_index] = cpu_to_le32(*cluster_offset); /* First of all we write grain itself, to avoid race condition * that may to corrupt the image. @@ -911,19 +945,15 @@ static int get_cluster_offset(BlockDriverState *bs, */ if (get_whole_cluster( bs, extent, *cluster_offset, offset, allocate) == -1) { - return -1; + return VMDK_ERROR; } if (m_data) { - m_data->offset = tmp; - m_data->l1_index = l1_index; - m_data->l2_index = l2_index; - m_data->l2_offset = l2_offset; - m_data->valid = 1; + m_data->offset = *cluster_offset; } } *cluster_offset <<= 9; - return 0; + return VMDK_OK; } static VmdkExtent *find_extent(BDRVVmdkState *s, @@ -959,8 +989,8 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, ret = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0, &offset); qemu_co_mutex_unlock(&s->lock); - /* get_cluster_offset returning 0 means success */ - ret = !ret; + + ret = (ret == VMDK_OK || ret == VMDK_ZEROED); index_in_cluster = sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; @@ -1103,9 +1133,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, if (n > nb_sectors) { n = nb_sectors; } - if (ret) { + if (ret != VMDK_OK) { /* if not allocated, try to read from parent image, if exist */ - if (bs->backing_hd) { + if (bs->backing_hd && ret != VMDK_ZEROED) { if (!vmdk_is_cid_valid(bs)) { return -EINVAL; } @@ -1142,8 +1172,17 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num, return ret; } +/** + * vmdk_write: + * @zeroed: buf is ignored (data is zero), use zeroed_grain GTE feature + * if possible, otherwise return -ENOTSUP. + * @zero_dry_run: used for zeroed == true only, don't update L2 table, just + * + * Returns: error code with 0 for success. + */ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) + const uint8_t *buf, int nb_sectors, + bool zeroed, bool zero_dry_run) { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; @@ -1173,7 +1212,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, sector_num << 9, !extent->compressed, &cluster_offset); if (extent->compressed) { - if (ret == 0) { + if (ret == VMDK_OK) { /* Refuse write to allocated cluster for streamOptimized */ fprintf(stderr, "VMDK: can't write to allocated cluster" @@ -1189,7 +1228,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, &cluster_offset); } } - if (ret) { + if (ret == VMDK_ERROR) { return -EINVAL; } extent_begin_sector = extent->end_sector - extent->sectors; @@ -1199,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (n > nb_sectors) { n = nb_sectors; } - - ret = vmdk_write_extent(extent, - cluster_offset, index_in_cluster * 512, - buf, n, sector_num); - if (ret) { - return ret; - } - if (m_data.valid) { - /* update L2 tables */ - if (vmdk_L2update(extent, &m_data) == -1) { - return -EIO; + if (zeroed) { + /* Do zeroed write, buf is ignored */ + if (extent->has_zero_grain && + index_in_cluster == 0 && + n >= extent->cluster_sectors) { + n = extent->cluster_sectors; + if (!zero_dry_run) { + m_data.offset = VMDK_GTE_ZEROED; + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + return -EIO; + } + } + } else { + return -ENOTSUP; + } + } else { + ret = vmdk_write_extent(extent, + cluster_offset, index_in_cluster * 512, + buf, n, sector_num); + if (ret) { + return ret; + } + if (m_data.valid) { + /* update L2 tables */ + if (vmdk_L2update(extent, &m_data) != VMDK_OK) { + return -EIO; + } } } nb_sectors -= n; @@ -1235,14 +1291,29 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num, int ret; BDRVVmdkState *s = bs->opaque; qemu_co_mutex_lock(&s->lock); - ret = vmdk_write(bs, sector_num, buf, nb_sectors); + ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false); + qemu_co_mutex_unlock(&s->lock); + return ret; +} + +static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors) +{ + int ret; + BDRVVmdkState *s = bs->opaque; + qemu_co_mutex_lock(&s->lock); + ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true); + if (!ret) { + ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false); + } qemu_co_mutex_unlock(&s->lock); return ret; } static int vmdk_create_extent(const char *filename, int64_t filesize, - bool flat, bool compress) + bool flat, bool compress, bool zeroed_grain) { int ret, i; int fd = 0; @@ -1264,9 +1335,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } magic = cpu_to_be32(VMDK4_MAGIC); memset(&header, 0, sizeof(header)); - header.version = 1; - header.flags = - 3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0); + header.version = zeroed_grain ? 2 : 1; + header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT + | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) + | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0; header.capacity = filesize / 512; header.granularity = 128; @@ -1357,7 +1429,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, if (filename == NULL || !strlen(filename)) { fprintf(stderr, "Vmdk: no filename provided.\n"); - return -1; + return VMDK_ERROR; } p = strrchr(filename, '/'); if (p == NULL) { @@ -1369,7 +1441,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, if (p != NULL) { p++; if (p - filename >= buf_len) { - return -1; + return VMDK_ERROR; } pstrcpy(path, p - filename + 1, filename); } else { @@ -1382,12 +1454,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix, postfix[0] = '\0'; } else { if (q - p >= buf_len) { - return -1; + return VMDK_ERROR; } pstrcpy(prefix, q - p + 1, p); pstrcpy(postfix, buf_len, q); } - return 0; + return VMDK_OK; } static int relative_path(char *dest, int dest_size, @@ -1403,11 +1475,11 @@ static int relative_path(char *dest, int dest_size, #endif if (!(dest && base && target)) { - return -1; + return VMDK_ERROR; } if (path_is_absolute(target)) { pstrcpy(dest, dest_size, target); - return 0; + return VMDK_OK; } while (base[i] == target[i]) { i++; @@ -1426,7 +1498,7 @@ static int relative_path(char *dest, int dest_size, pstrcat(dest, dest_size, sep); } pstrcat(dest, dest_size, q); - return 0; + return VMDK_OK; } static int vmdk_create(const char *filename, QEMUOptionParameter *options) @@ -1447,6 +1519,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) char parent_desc_line[BUF_SIZE] = ""; uint32_t parent_cid = 0xffffffff; uint32_t number_heads = 16; + bool zeroed_grain = false; const char desc_template[] = "# Disk DescriptorFile\n" "version=1\n" @@ -1482,6 +1555,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0; } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) { fmt = options->value.s; + } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) { + zeroed_grain |= options->value.n; } options++; } @@ -1568,7 +1643,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) snprintf(ext_filename, sizeof(ext_filename), "%s%s", path, desc_filename); - if (vmdk_create_extent(ext_filename, size, flat, compress)) { + if (vmdk_create_extent(ext_filename, size, + flat, compress, zeroed_grain)) { return -EINVAL; } filesize -= size; @@ -1694,6 +1770,11 @@ static QEMUOptionParameter vmdk_create_options[] = { "VMDK flat extent format, can be one of " "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} " }, + { + .name = BLOCK_OPT_ZEROED_GRAIN, + .type = OPT_FLAG, + .help = "Enable efficient zero writes using the zeroed-grain GTE feature" + }, { NULL } }; @@ -1705,6 +1786,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_reopen_prepare = vmdk_reopen_prepare, .bdrv_read = vmdk_co_read, .bdrv_write = vmdk_co_write, + .bdrv_co_write_zeroes = vmdk_co_write_zeroes, .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, .bdrv_co_flush_to_disk = vmdk_co_flush, diff --git a/blockdev.c b/blockdev.c index 6e293e955..7c9d8dd0a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1118,6 +1118,7 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) void qmp_block_resize(const char *device, int64_t size, Error **errp) { BlockDriverState *bs; + int ret; bs = bdrv_find(device); if (!bs) { @@ -1133,7 +1134,8 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp) /* complete all in-flight operations before resizing the device */ bdrv_drain_all(); - switch (bdrv_truncate(bs, size)) { + ret = bdrv_truncate(bs, size); + switch (ret) { case 0: break; case -ENOMEDIUM: @@ -1149,7 +1151,7 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp) error_set(errp, QERR_DEVICE_IN_USE, device); break; default: - error_set(errp, QERR_UNDEFINED_ERROR); + error_setg_errno(errp, -ret, "Could not resize"); break; } } diff --git a/include/block/nbd.h b/include/block/nbd.h index 0903d7a60..c90f5e4d9 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -58,7 +58,8 @@ enum { #define NBD_DEFAULT_PORT 10809 -#define NBD_BUFFER_SIZE (1024*1024) +/* Maximum size of a single READ/WRITE data buffer */ +#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024) ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); int tcp_socket_incoming(const char *address, uint16_t port); diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h new file mode 100644 index 000000000..56d1c3bfd --- /dev/null +++ b/include/qemu/crc32c.h @@ -0,0 +1,35 @@ +/* + * Castagnoli CRC32C Checksum Algorithm + * + * Polynomial: 0x11EDC6F41 + * + * Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman + * "Optimization of Cyclic Redundancy-Check Codes with 24 + * and 32 Parity Bits",IEEE Transactions on Communication, + * Volume 41, Number 6, June 1993 + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * Based on the Linux kernel cryptographic crc32c module, + * + * Copyright (c) 2004 Cisco Systems, Inc. + * Copyright (c) 2008 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef QEMU_CRC32_H +#define QEMU_CRC32_H + +#include "qemu-common.h" + +uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length); + +#endif diff --git a/nbd.c b/nbd.c index 85187fff9..2606403a4 100644 --- a/nbd.c +++ b/nbd.c @@ -98,7 +98,6 @@ struct NBDExport { off_t size; uint32_t nbdflags; QTAILQ_HEAD(, NBDClient) clients; - QSIMPLEQ_HEAD(, NBDRequest) requests; QTAILQ_ENTRY(NBDExport) next; }; @@ -845,18 +844,11 @@ void nbd_client_close(NBDClient *client) static NBDRequest *nbd_request_get(NBDClient *client) { NBDRequest *req; - NBDExport *exp = client->exp; assert(client->nb_requests <= MAX_NBD_REQUESTS - 1); client->nb_requests++; - if (QSIMPLEQ_EMPTY(&exp->requests)) { - req = g_malloc0(sizeof(NBDRequest)); - req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE); - } else { - req = QSIMPLEQ_FIRST(&exp->requests); - QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry); - } + req = g_slice_new0(NBDRequest); nbd_client_get(client); req->client = client; return req; @@ -865,7 +857,12 @@ static NBDRequest *nbd_request_get(NBDClient *client) static void nbd_request_put(NBDRequest *req) { NBDClient *client = req->client; - QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry); + + if (req->data) { + qemu_vfree(req->data); + } + g_slice_free(NBDRequest, req); + if (client->nb_requests-- == MAX_NBD_REQUESTS) { qemu_notify_event(); } @@ -877,7 +874,6 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, void (*close)(NBDExport *)) { NBDExport *exp = g_malloc0(sizeof(NBDExport)); - QSIMPLEQ_INIT(&exp->requests); exp->refcount = 1; QTAILQ_INIT(&exp->clients); exp->bs = bs; @@ -953,13 +949,6 @@ void nbd_export_put(NBDExport *exp) exp->close(exp); } - while (!QSIMPLEQ_EMPTY(&exp->requests)) { - NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests); - QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry); - qemu_vfree(first->data); - g_free(first); - } - g_free(exp); } } @@ -1018,6 +1007,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque { NBDClient *client = req->client; int csock = client->sock; + uint32_t command; ssize_t rc; client->recv_coroutine = qemu_coroutine_self(); @@ -1029,9 +1019,9 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque goto out; } - if (request->len > NBD_BUFFER_SIZE) { + if (request->len > NBD_MAX_BUFFER_SIZE) { LOG("len (%u) is larger than max len (%u)", - request->len, NBD_BUFFER_SIZE); + request->len, NBD_MAX_BUFFER_SIZE); rc = -EINVAL; goto out; } @@ -1045,7 +1035,11 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque TRACE("Decoding type"); - if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) { + command = request->type & NBD_CMD_MASK_COMMAND; + if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { + req->data = qemu_blockalign(client->exp->bs, request->len); + } + if (command == NBD_CMD_WRITE) { TRACE("Reading %u byte(s)", request->len); if (qemu_co_recv(csock, req->data, request->len) != request->len) { diff --git a/net/net.c b/net/net.c index 7869161d8..43a74e43a 100644 --- a/net/net.c +++ b/net/net.c @@ -157,8 +157,7 @@ void qemu_macaddr_default_if_unset(MACAddr *macaddr) /** * Generate a name for net client * - * Only net clients created with the legacy -net option need this. Naming is - * mandatory for net clients created with -netdev. + * Only net clients created with the legacy -net option and NICs need this. */ static char *assign_name(NetClientState *nc1, const char *model) { @@ -170,9 +169,7 @@ static char *assign_name(NetClientState *nc1, const char *model) if (nc == nc1) { continue; } - /* For compatibility only bump id for net clients on a vlan */ - if (strcmp(nc->model, model) == 0 && - net_hub_id_for_client(nc, NULL) == 0) { + if (strcmp(nc->model, model) == 0) { id++; } } diff --git a/net/tap.c b/net/tap.c index 17bdf014a..e0b7a2a5a 100644 --- a/net/tap.c +++ b/net/tap.c @@ -623,7 +623,7 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, vhostfdname || (tap->has_vhostforce && tap->vhostforce)) { int vhostfd; - if (tap->has_vhostfd) { + if (tap->has_vhostfd || tap->has_vhostfds) { vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname); if (vhostfd == -1) { return -1; diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c index f975da08c..805247d25 100644 --- a/target-mips/dsp_helper.c +++ b/target-mips/dsp_helper.c @@ -682,49 +682,31 @@ static inline uint8_t mipsdsp_sat8_reduce_precision(uint16_t a, static inline uint8_t mipsdsp_lshift8(uint8_t a, uint8_t s, CPUMIPSState *env) { - uint8_t sign; uint8_t discard; - if (s == 0) { - return a; - } else { - sign = (a >> 7) & 0x01; - if (sign != 0) { - discard = (((0x01 << (8 - s)) - 1) << s) | - ((a >> (6 - (s - 1))) & ((0x01 << s) - 1)); - } else { - discard = a >> (6 - (s - 1)); - } + if (s != 0) { + discard = a >> (8 - s); if (discard != 0x00) { set_DSPControl_overflow_flag(1, 22, env); } - return a << s; } + return a << s; } static inline uint16_t mipsdsp_lshift16(uint16_t a, uint8_t s, CPUMIPSState *env) { - uint8_t sign; uint16_t discard; - if (s == 0) { - return a; - } else { - sign = (a >> 15) & 0x01; - if (sign != 0) { - discard = (((0x01 << (16 - s)) - 1) << s) | - ((a >> (14 - (s - 1))) & ((0x01 << s) - 1)); - } else { - discard = a >> (14 - (s - 1)); - } + if (s != 0) { + discard = (int16_t)a >> (15 - s); if ((discard != 0x0000) && (discard != 0xFFFF)) { set_DSPControl_overflow_flag(1, 22, env); } - return a << s; } + return a << s; } diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 7216f7a63..3d434124a 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1190,7 +1190,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, TCG_REG_R2, tlb_offset, 1, 1); if (TARGET_LONG_BITS == 64) { - tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, + tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1, TCG_REG_R2, 4, 1, 0); } } @@ -1611,17 +1611,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - { + if (use_armv7_instructions || check_fit_imm(args[0])) { + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); + } else { uint8_t *ld_ptr = s->code_ptr; - if (args[0] >> 8) - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); - else - tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); - if (args[0] >> 8) { - *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; - tcg_out32(s, args[0]); - } + *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; + tcg_out32(s, args[0]); } break; case INDEX_op_goto_tb: diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index a536bf70a..31eb62b60 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -127,6 +127,9 @@ _make_test_img() -e "s# compat='[^']*'##g" \ -e "s# compat6=\\(on\\|off\\)##g" \ -e "s# static=\\(on\\|off\\)##g" \ + -e "s# zeroed_grain=\\(on\\|off\\)##g" \ + -e "s# subformat='[^']*'##g" \ + -e "s# adapter_type='[^']*'##g" \ -e "s# lazy_refcounts=\\(on\\|off\\)##g" # Start an NBD server on the image file, which is what we'll be talking to diff --git a/tests/tcg/mips/mips32-dsp/shll_ph.c b/tests/tcg/mips/mips32-dsp/shll_ph.c index b8f1ff528..5fa58ccf6 100644 --- a/tests/tcg/mips/mips32-dsp/shll_ph.c +++ b/tests/tcg/mips/mips32-dsp/shll_ph.c @@ -11,7 +11,38 @@ int main() resultdsp = 1; __asm - ("shll.ph %0, %2, 0x0B\n\t" + ("wrdsp $0\n\t" + "shll.ph %0, %2, 0x0B\n\t" + "rddsp %1\n\t" + : "=r"(rd), "=r"(dsp) + : "r"(rt) + ); + dsp = (dsp >> 22) & 0x01; + assert(dsp == resultdsp); + assert(rd == result); + + rt = 0x7fff8000; + result = 0xfffe0000; + resultdsp = 1; + + __asm + ("wrdsp $0\n\t" + "shll.ph %0, %2, 0x01\n\t" + "rddsp %1\n\t" + : "=r"(rd), "=r"(dsp) + : "r"(rt) + ); + dsp = (dsp >> 22) & 0x01; + assert(dsp == resultdsp); + assert(rd == result); + + rt = 0x00000001; + result = 0x00008000; + resultdsp = 1; + + __asm + ("wrdsp $0\n\t" + "shll.ph %0, %2, 0x0F\n\t" "rddsp %1\n\t" : "=r"(rd), "=r"(dsp) : "r"(rt) diff --git a/tests/tcg/mips/mips32-dsp/shll_qb.c b/tests/tcg/mips/mips32-dsp/shll_qb.c index 8c1b91c63..729716d62 100644 --- a/tests/tcg/mips/mips32-dsp/shll_qb.c +++ b/tests/tcg/mips/mips32-dsp/shll_qb.c @@ -11,12 +11,14 @@ int main() resultdsp = 0x00; __asm - ("shll.qb %0, %2, 0x00\n\t" + ("wrdsp $0\n\t" + "shll.qb %0, %2, 0x00\n\t" "rddsp %1\n\t" : "=r"(rd), "=r"(dsp) : "r"(rt) ); dsp = (dsp >> 22) & 0x01; + assert(dsp == resultdsp); assert(rd == result); rt = 0x87654321; @@ -24,12 +26,29 @@ int main() resultdsp = 0x01; __asm - ("shll.qb %0, %2, 0x03\n\t" + ("wrdsp $0\n\t" + "shll.qb %0, %2, 0x03\n\t" "rddsp %1\n\t" : "=r"(rd), "=r"(dsp) : "r"(rt) ); dsp = (dsp >> 22) & 0x01; + assert(dsp == resultdsp); + assert(rd == result); + + rt = 0x00000001; + result = 0x00000080; + resultdsp = 0x00; + + __asm + ("wrdsp $0\n\t" + "shll.qb %0, %2, 0x07\n\t" + "rddsp %1\n\t" + : "=r"(rd), "=r"(dsp) + : "r"(rt) + ); + dsp = (dsp >> 22) & 0x01; + assert(dsp == resultdsp); assert(rd == result); return 0; diff --git a/util/Makefile.objs b/util/Makefile.objs index c5652f526..4a1bd4ee3 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -10,3 +10,4 @@ util-obj-$(CONFIG_POSIX) += compatfd.o util-obj-y += iov.o aes.o qemu-config.o qemu-sockets.o uri.o notify.o util-obj-y += qemu-option.o qemu-progress.o util-obj-y += hexdump.o +util-obj-y += crc32c.o diff --git a/util/crc32c.c b/util/crc32c.c new file mode 100644 index 000000000..886632780 --- /dev/null +++ b/util/crc32c.c @@ -0,0 +1,115 @@ +/* + * Castagnoli CRC32C Checksum Algorithm + * + * Polynomial: 0x11EDC6F41 + * + * Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman + * "Optimization of Cyclic Redundancy-Check Codes with 24 + * and 32 Parity Bits",IEEE Transactions on Communication, + * Volume 41, Number 6, June 1993 + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody + * + * Based on the Linux kernel cryptographic crc32c module, + * + * Copyright (c) 2004 Cisco Systems, Inc. + * Copyright (c) 2008 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include "qemu-common.h" +#include "qemu/crc32c.h" + +/* + * This is the CRC-32C table + * Generated with: + * width = 32 bits + * poly = 0x1EDC6F41 + * reflect input bytes = true + * reflect output bytes = true + */ + +static const uint32_t crc32c_table[256] = { + 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, + 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, + 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, + 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, + 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, + 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, + 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, + 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, + 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, + 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, + 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, + 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, + 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, + 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, + 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, + 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, + 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, + 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, + 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, + 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, + 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, + 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, + 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, + 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, + 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, + 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, + 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, + 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, + 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, + 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, + 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, + 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, + 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, + 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, + 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, + 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, + 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, + 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, + 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, + 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, + 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, + 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, + 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, + 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, + 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, + 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, + 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, + 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, + 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, + 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, + 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, + 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, + 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, + 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, + 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, + 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, + 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, + 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, + 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, + 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, + 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, + 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, + 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, + 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L +}; + + +uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length) +{ + while (length--) { + crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); + } + return crc^0xffffffff; +} +