]> git.proxmox.com Git - mirror_qemu.git/blame - block/cow.c
COW: Speed up writes
[mirror_qemu.git] / block / cow.c
CommitLineData
ea2384d3
FB
1/*
2 * Block driver for the COW format
5fafdf24 3 *
ea2384d3 4 * Copyright (c) 2004 Fabrice Bellard
5fafdf24 5 *
ea2384d3
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
737e150e 25#include "block/block_int.h"
1de7afc9 26#include "qemu/module.h"
ea2384d3
FB
27
28/**************************************************************/
29/* COW block driver using file system holes */
30
31/* user mode linux compatible COW file */
32#define COW_MAGIC 0x4f4f4f4d /* MOOO */
33#define COW_VERSION 2
34
35struct cow_header_v2 {
36 uint32_t magic;
37 uint32_t version;
38 char backing_file[1024];
39 int32_t mtime;
40 uint64_t size;
41 uint32_t sectorsize;
42};
43
44typedef struct BDRVCowState {
848c66e8 45 CoMutex lock;
ea2384d3
FB
46 int64_t cow_sectors_offset;
47} BDRVCowState;
48
49static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
50{
51 const struct cow_header_v2 *cow_header = (const void *)buf;
52
712e7874
FB
53 if (buf_size >= sizeof(struct cow_header_v2) &&
54 be32_to_cpu(cow_header->magic) == COW_MAGIC &&
5fafdf24 55 be32_to_cpu(cow_header->version) == COW_VERSION)
ea2384d3
FB
56 return 100;
57 else
58 return 0;
59}
60
015a1036
HR
61static int cow_open(BlockDriverState *bs, QDict *options, int flags,
62 Error **errp)
ea2384d3
FB
63{
64 BDRVCowState *s = bs->opaque;
ea2384d3 65 struct cow_header_v2 cow_header;
893a9cb4 66 int bitmap_size;
ea2384d3 67 int64_t size;
16d2fc00 68 int ret;
ea2384d3 69
ea2384d3 70 /* see if it is a cow image */
16d2fc00
LZH
71 ret = bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header));
72 if (ret < 0) {
73 goto fail;
74 }
75
76 if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
15bac0d5 77 ret = -EMEDIUMTYPE;
ea2384d3
FB
78 goto fail;
79 }
80
16d2fc00
LZH
81 if (be32_to_cpu(cow_header.version) != COW_VERSION) {
82 char version[64];
83 snprintf(version, sizeof(version),
84 "COW version %d", cow_header.version);
85 qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
86 bs->device_name, "cow", version);
87 ret = -ENOTSUP;
ea2384d3
FB
88 goto fail;
89 }
3b46e624 90
ea2384d3
FB
91 /* cow image found */
92 size = be64_to_cpu(cow_header.size);
93 bs->total_sectors = size / 512;
94
5fafdf24 95 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
ea2384d3 96 cow_header.backing_file);
3b46e624 97
893a9cb4
CH
98 bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
99 s->cow_sectors_offset = (bitmap_size + 511) & ~511;
848c66e8 100 qemu_co_mutex_init(&s->lock);
ea2384d3
FB
101 return 0;
102 fail:
16d2fc00 103 return ret;
ea2384d3
FB
104}
105
14b98fda 106static inline void cow_set_bits(uint8_t *bitmap, int start, int64_t nb_sectors)
ea2384d3 107{
14b98fda
CS
108 int64_t bitnum = start, last = start + nb_sectors;
109 while (bitnum < last) {
110 if ((bitnum & 7) == 0 && bitnum + 8 <= last) {
111 bitmap[bitnum / 8] = 0xFF;
112 bitnum += 8;
113 continue;
26ae9804 114 }
14b98fda
CS
115 bitmap[bitnum/8] |= (1 << (bitnum % 8));
116 bitnum++;
26ae9804 117 }
ea2384d3
FB
118}
119
276cbc7f
PB
120#define BITS_PER_BITMAP_SECTOR (512 * 8)
121
122/* Cannot use bitmap.c on big-endian machines. */
123static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
ea2384d3 124{
276cbc7f
PB
125 return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
126}
ea2384d3 127
276cbc7f
PB
128static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
129{
130 int streak_value = value ? 0xFF : 0;
131 int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
132 int bitnum = start;
133 while (bitnum < last) {
134 if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
135 bitnum += 8;
136 continue;
137 }
138 if (cow_test_bit(bitnum, bitmap) == value) {
139 bitnum++;
140 continue;
141 }
142 break;
893a9cb4 143 }
276cbc7f 144 return MIN(bitnum, last) - start;
893a9cb4 145}
ea2384d3
FB
146
147/* Return true if first block has been changed (ie. current version is
148 * in COW file). Set the number of continuous blocks for which that
149 * is true. */
81145834
SH
150static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
151 int64_t sector_num, int nb_sectors, int *num_same)
ea2384d3 152{
276cbc7f
PB
153 int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
154 uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
155 uint8_t bitmap[BDRV_SECTOR_SIZE];
156 int ret;
ea2384d3
FB
157 int changed;
158
276cbc7f
PB
159 ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
160 if (ret < 0) {
161 return ret;
ea2384d3
FB
162 }
163
276cbc7f
PB
164 bitnum &= BITS_PER_BITMAP_SECTOR - 1;
165 changed = cow_test_bit(bitnum, bitmap);
166 *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors);
ea2384d3
FB
167 return changed;
168}
169
b6b8a333
PB
170static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
171 int64_t sector_num, int nb_sectors, int *num_same)
172{
4bc74be9
PB
173 BDRVCowState *s = bs->opaque;
174 int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
175 int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
176 if (ret < 0) {
177 return ret;
178 }
179 return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
b6b8a333
PB
180}
181
893a9cb4
CH
182static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
183 int nb_sectors)
ea2384d3 184{
14b98fda
CS
185 int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
186 uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
26ae9804 187 bool first = true;
14b98fda
CS
188 int sector_bits;
189
190 for ( ; nb_sectors;
191 bitnum += sector_bits,
192 nb_sectors -= sector_bits,
193 offset += BDRV_SECTOR_SIZE) {
194 int ret, set;
195 uint8_t bitmap[BDRV_SECTOR_SIZE];
196
197 bitnum &= BITS_PER_BITMAP_SECTOR - 1;
198 sector_bits = MIN(nb_sectors, BITS_PER_BITMAP_SECTOR - bitnum);
199
200 ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
201 if (ret < 0) {
202 return ret;
203 }
204
205 /* Skip over any already set bits */
206 set = cow_find_streak(bitmap, 1, bitnum, sector_bits);
207 bitnum += set;
208 sector_bits -= set;
209 nb_sectors -= set;
210 if (!sector_bits) {
211 continue;
212 }
213
214 if (first) {
215 ret = bdrv_flush(bs->file);
216 if (ret < 0) {
217 return ret;
218 }
219 first = false;
220 }
221
222 cow_set_bits(bitmap, bitnum, sector_bits);
893a9cb4 223
14b98fda
CS
224 ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
225 if (ret < 0) {
226 return ret;
893a9cb4
CH
227 }
228 }
229
14b98fda 230 return 0;
ea2384d3
FB
231}
232
e94d1387
SH
233static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
234 uint8_t *buf, int nb_sectors)
ea2384d3
FB
235{
236 BDRVCowState *s = bs->opaque;
237 int ret, n;
3b46e624 238
ea2384d3 239 while (nb_sectors > 0) {
d663640c
PB
240 ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
241 if (ret < 0) {
242 return ret;
243 }
244 if (ret) {
2063392a
CH
245 ret = bdrv_pread(bs->file,
246 s->cow_sectors_offset + sector_num * 512,
247 buf, n * 512);
16d2fc00
LZH
248 if (ret < 0) {
249 return ret;
250 }
ea2384d3 251 } else {
83f64091
FB
252 if (bs->backing_hd) {
253 /* read from the base image */
254 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
16d2fc00
LZH
255 if (ret < 0) {
256 return ret;
257 }
83f64091 258 } else {
16d2fc00
LZH
259 memset(buf, 0, n * 512);
260 }
83f64091 261 }
ea2384d3
FB
262 nb_sectors -= n;
263 sector_num += n;
264 buf += n * 512;
265 }
266 return 0;
267}
268
2914caa0
PB
269static coroutine_fn int cow_co_read(BlockDriverState *bs, int64_t sector_num,
270 uint8_t *buf, int nb_sectors)
271{
272 int ret;
273 BDRVCowState *s = bs->opaque;
274 qemu_co_mutex_lock(&s->lock);
275 ret = cow_read(bs, sector_num, buf, nb_sectors);
276 qemu_co_mutex_unlock(&s->lock);
277 return ret;
278}
279
5fafdf24 280static int cow_write(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
281 const uint8_t *buf, int nb_sectors)
282{
283 BDRVCowState *s = bs->opaque;
893a9cb4 284 int ret;
3b46e624 285
2063392a
CH
286 ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
287 buf, nb_sectors * 512);
16d2fc00
LZH
288 if (ret < 0) {
289 return ret;
290 }
893a9cb4
CH
291
292 return cow_update_bitmap(bs, sector_num, nb_sectors);
ea2384d3
FB
293}
294
e183ef75
PB
295static coroutine_fn int cow_co_write(BlockDriverState *bs, int64_t sector_num,
296 const uint8_t *buf, int nb_sectors)
297{
298 int ret;
299 BDRVCowState *s = bs->opaque;
300 qemu_co_mutex_lock(&s->lock);
301 ret = cow_write(bs, sector_num, buf, nb_sectors);
302 qemu_co_mutex_unlock(&s->lock);
303 return ret;
304}
305
e2731add 306static void cow_close(BlockDriverState *bs)
ea2384d3 307{
ea2384d3
FB
308}
309
d5124c00
HR
310static int cow_create(const char *filename, QEMUOptionParameter *options,
311 Error **errp)
ea2384d3 312{
ea2384d3
FB
313 struct cow_header_v2 cow_header;
314 struct stat st;
0e7e1989
KW
315 int64_t image_sectors = 0;
316 const char *image_filename = NULL;
34b5d2c6 317 Error *local_err = NULL;
31f38120 318 int ret;
3535a9c6 319 BlockDriverState *cow_bs;
0e7e1989
KW
320
321 /* Read out options */
322 while (options && options->name) {
323 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
324 image_sectors = options->value.n / 512;
325 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
326 image_filename = options->value.s;
327 }
328 options++;
329 }
ea2384d3 330
cc84d90f 331 ret = bdrv_create_file(filename, options, &local_err);
3535a9c6 332 if (ret < 0) {
cc84d90f
HR
333 qerror_report_err(local_err);
334 error_free(local_err);
3535a9c6
LZH
335 return ret;
336 }
337
34b5d2c6 338 ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
3535a9c6 339 if (ret < 0) {
34b5d2c6
HR
340 qerror_report_err(local_err);
341 error_free(local_err);
3535a9c6
LZH
342 return ret;
343 }
344
ea2384d3
FB
345 memset(&cow_header, 0, sizeof(cow_header));
346 cow_header.magic = cpu_to_be32(COW_MAGIC);
347 cow_header.version = cpu_to_be32(COW_VERSION);
348 if (image_filename) {
83f64091
FB
349 /* Note: if no file, we put a dummy mtime */
350 cow_header.mtime = cpu_to_be32(0);
351
3535a9c6 352 if (stat(image_filename, &st) != 0) {
83f64091 353 goto mtime_fail;
ea2384d3 354 }
ea2384d3 355 cow_header.mtime = cpu_to_be32(st.st_mtime);
83f64091
FB
356 mtime_fail:
357 pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
358 image_filename);
ea2384d3
FB
359 }
360 cow_header.sectorsize = cpu_to_be32(512);
361 cow_header.size = cpu_to_be64(image_sectors * 512);
3535a9c6 362 ret = bdrv_pwrite(cow_bs, 0, &cow_header, sizeof(cow_header));
16d2fc00 363 if (ret < 0) {
31f38120
KS
364 goto exit;
365 }
366
ea2384d3 367 /* resize to include at least all the bitmap */
3535a9c6
LZH
368 ret = bdrv_truncate(cow_bs,
369 sizeof(cow_header) + ((image_sectors + 7) >> 3));
16d2fc00 370 if (ret < 0) {
31f38120
KS
371 goto exit;
372 }
373
31f38120 374exit:
4f6fd349 375 bdrv_unref(cow_bs);
31f38120 376 return ret;
ea2384d3
FB
377}
378
0e7e1989 379static QEMUOptionParameter cow_create_options[] = {
db08adf5
KW
380 {
381 .name = BLOCK_OPT_SIZE,
382 .type = OPT_SIZE,
383 .help = "Virtual disk size"
384 },
385 {
386 .name = BLOCK_OPT_BACKING_FILE,
387 .type = OPT_STRING,
388 .help = "File name of a base image"
389 },
0e7e1989
KW
390 { NULL }
391};
392
5efa9d5a 393static BlockDriver bdrv_cow = {
c68b89ac
KW
394 .format_name = "cow",
395 .instance_size = sizeof(BDRVCowState),
396
397 .bdrv_probe = cow_probe,
398 .bdrv_open = cow_open,
399 .bdrv_close = cow_close,
400 .bdrv_create = cow_create,
3ac21627 401 .bdrv_has_zero_init = bdrv_has_zero_init_1,
c68b89ac
KW
402
403 .bdrv_read = cow_co_read,
404 .bdrv_write = cow_co_write,
b6b8a333 405 .bdrv_co_get_block_status = cow_co_get_block_status,
0e7e1989
KW
406
407 .create_options = cow_create_options,
ea2384d3 408};
5efa9d5a
AL
409
410static void bdrv_cow_init(void)
411{
412 bdrv_register(&bdrv_cow);
413}
414
415block_init(bdrv_cow_init);