]>
Commit | Line | Data |
---|---|---|
27cec15e BC |
1 | /* |
2 | * Quorum Block filter | |
3 | * | |
4 | * Copyright (C) 2012-2014 Nodalink, EURL. | |
5 | * | |
6 | * Author: | |
7 | * Benoît Canet <benoit.canet@irqsave.net> | |
8 | * | |
9 | * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp) | |
10 | * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc). | |
11 | * | |
12 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
13 | * See the COPYING file in the top-level directory. | |
14 | */ | |
15 | ||
95c6bff3 BC |
16 | #include <gnutls/gnutls.h> |
17 | #include <gnutls/crypto.h> | |
27cec15e | 18 | #include "block/block_int.h" |
95c6bff3 BC |
19 | #include "qapi/qmp/qjson.h" |
20 | ||
21 | #define HASH_LENGTH 32 | |
22 | ||
23 | /* This union holds a vote hash value */ | |
24 | typedef union QuorumVoteValue { | |
25 | char h[HASH_LENGTH]; /* SHA-256 hash */ | |
26 | int64_t l; /* simpler 64 bits hash */ | |
27 | } QuorumVoteValue; | |
28 | ||
29 | /* A vote item */ | |
30 | typedef struct QuorumVoteItem { | |
31 | int index; | |
32 | QLIST_ENTRY(QuorumVoteItem) next; | |
33 | } QuorumVoteItem; | |
34 | ||
35 | /* this structure is a vote version. A version is the set of votes sharing the | |
36 | * same vote value. | |
37 | * The set of votes will be tracked with the items field and its cardinality is | |
38 | * vote_count. | |
39 | */ | |
40 | typedef struct QuorumVoteVersion { | |
41 | QuorumVoteValue value; | |
42 | int index; | |
43 | int vote_count; | |
44 | QLIST_HEAD(, QuorumVoteItem) items; | |
45 | QLIST_ENTRY(QuorumVoteVersion) next; | |
46 | } QuorumVoteVersion; | |
47 | ||
48 | /* this structure holds a group of vote versions together */ | |
49 | typedef struct QuorumVotes { | |
50 | QLIST_HEAD(, QuorumVoteVersion) vote_list; | |
51 | bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b); | |
52 | } QuorumVotes; | |
27cec15e | 53 | |
cadebd7a BC |
54 | /* the following structure holds the state of one quorum instance */ |
55 | typedef struct BDRVQuorumState { | |
56 | BlockDriverState **bs; /* children BlockDriverStates */ | |
57 | int num_children; /* children count */ | |
58 | int threshold; /* if less than threshold children reads gave the | |
59 | * same result a quorum error occurs. | |
60 | */ | |
61 | bool is_blkverify; /* true if the driver is in blkverify mode | |
62 | * Writes are mirrored on two children devices. | |
63 | * On reads the two children devices' contents are | |
64 | * compared and if a difference is spotted its | |
65 | * location is printed and the code aborts. | |
66 | * It is useful to debug other block drivers by | |
67 | * comparing them with a reference one. | |
68 | */ | |
69 | } BDRVQuorumState; | |
70 | ||
27cec15e BC |
71 | typedef struct QuorumAIOCB QuorumAIOCB; |
72 | ||
73 | /* Quorum will create one instance of the following structure per operation it | |
74 | * performs on its children. | |
75 | * So for each read/write operation coming from the upper layer there will be | |
76 | * $children_count QuorumChildRequest. | |
77 | */ | |
78 | typedef struct QuorumChildRequest { | |
79 | BlockDriverAIOCB *aiocb; | |
80 | QEMUIOVector qiov; | |
81 | uint8_t *buf; | |
82 | int ret; | |
83 | QuorumAIOCB *parent; | |
84 | } QuorumChildRequest; | |
85 | ||
86 | /* Quorum will use the following structure to track progress of each read/write | |
87 | * operation received by the upper layer. | |
88 | * This structure hold pointers to the QuorumChildRequest structures instances | |
89 | * used to do operations on each children and track overall progress. | |
90 | */ | |
91 | struct QuorumAIOCB { | |
92 | BlockDriverAIOCB common; | |
93 | ||
94 | /* Request metadata */ | |
95 | uint64_t sector_num; | |
96 | int nb_sectors; | |
97 | ||
98 | QEMUIOVector *qiov; /* calling IOV */ | |
99 | ||
100 | QuorumChildRequest *qcrs; /* individual child requests */ | |
101 | int count; /* number of completed AIOCB */ | |
102 | int success_count; /* number of successfully completed AIOCB */ | |
103 | ||
95c6bff3 BC |
104 | QuorumVotes votes; |
105 | ||
27cec15e BC |
106 | bool is_read; |
107 | int vote_ret; | |
108 | }; | |
cadebd7a | 109 | |
95c6bff3 BC |
110 | static void quorum_vote(QuorumAIOCB *acb); |
111 | ||
13e7956e BC |
112 | static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) |
113 | { | |
114 | QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common); | |
115 | BDRVQuorumState *s = acb->common.bs->opaque; | |
116 | int i; | |
117 | ||
118 | /* cancel all callbacks */ | |
119 | for (i = 0; i < s->num_children; i++) { | |
120 | bdrv_aio_cancel(acb->qcrs[i].aiocb); | |
121 | } | |
122 | ||
123 | g_free(acb->qcrs); | |
124 | qemu_aio_release(acb); | |
125 | } | |
126 | ||
127 | static AIOCBInfo quorum_aiocb_info = { | |
128 | .aiocb_size = sizeof(QuorumAIOCB), | |
129 | .cancel = quorum_aio_cancel, | |
130 | }; | |
131 | ||
132 | static void quorum_aio_finalize(QuorumAIOCB *acb) | |
133 | { | |
7db6982a BC |
134 | BDRVQuorumState *s = acb->common.bs->opaque; |
135 | int i, ret = 0; | |
13e7956e | 136 | |
95c6bff3 BC |
137 | if (acb->vote_ret) { |
138 | ret = acb->vote_ret; | |
139 | } | |
140 | ||
13e7956e BC |
141 | acb->common.cb(acb->common.opaque, ret); |
142 | ||
7db6982a BC |
143 | if (acb->is_read) { |
144 | for (i = 0; i < s->num_children; i++) { | |
145 | qemu_vfree(acb->qcrs[i].buf); | |
146 | qemu_iovec_destroy(&acb->qcrs[i].qiov); | |
147 | } | |
148 | } | |
149 | ||
13e7956e BC |
150 | g_free(acb->qcrs); |
151 | qemu_aio_release(acb); | |
152 | } | |
153 | ||
95c6bff3 BC |
154 | static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) |
155 | { | |
156 | return !memcmp(a->h, b->h, HASH_LENGTH); | |
157 | } | |
158 | ||
159 | static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) | |
160 | { | |
161 | return a->l == b->l; | |
162 | } | |
163 | ||
13e7956e BC |
164 | static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, |
165 | BlockDriverState *bs, | |
166 | QEMUIOVector *qiov, | |
167 | uint64_t sector_num, | |
168 | int nb_sectors, | |
169 | BlockDriverCompletionFunc *cb, | |
170 | void *opaque) | |
171 | { | |
172 | QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque); | |
173 | int i; | |
174 | ||
175 | acb->common.bs->opaque = s; | |
176 | acb->sector_num = sector_num; | |
177 | acb->nb_sectors = nb_sectors; | |
178 | acb->qiov = qiov; | |
179 | acb->qcrs = g_new0(QuorumChildRequest, s->num_children); | |
180 | acb->count = 0; | |
181 | acb->success_count = 0; | |
95c6bff3 BC |
182 | acb->votes.compare = quorum_sha256_compare; |
183 | QLIST_INIT(&acb->votes.vote_list); | |
13e7956e BC |
184 | acb->is_read = false; |
185 | acb->vote_ret = 0; | |
186 | ||
187 | for (i = 0; i < s->num_children; i++) { | |
188 | acb->qcrs[i].buf = NULL; | |
189 | acb->qcrs[i].ret = 0; | |
190 | acb->qcrs[i].parent = acb; | |
191 | } | |
192 | ||
193 | return acb; | |
194 | } | |
195 | ||
95c6bff3 BC |
196 | static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret) |
197 | { | |
198 | QObject *data; | |
199 | assert(node_name); | |
200 | data = qobject_from_jsonf("{ 'ret': %d" | |
201 | ", 'node-name': %s" | |
202 | ", 'sector-num': %" PRId64 | |
203 | ", 'sectors-count': %d }", | |
204 | ret, node_name, acb->sector_num, acb->nb_sectors); | |
205 | monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data); | |
206 | qobject_decref(data); | |
207 | } | |
208 | ||
209 | static void quorum_report_failure(QuorumAIOCB *acb) | |
210 | { | |
211 | QObject *data; | |
212 | const char *reference = acb->common.bs->device_name[0] ? | |
213 | acb->common.bs->device_name : | |
214 | acb->common.bs->node_name; | |
215 | data = qobject_from_jsonf("{ 'reference': %s" | |
216 | ", 'sector-num': %" PRId64 | |
217 | ", 'sectors-count': %d }", | |
218 | reference, acb->sector_num, acb->nb_sectors); | |
219 | monitor_protocol_event(QEVENT_QUORUM_FAILURE, data); | |
220 | qobject_decref(data); | |
221 | } | |
222 | ||
223 | static int quorum_vote_error(QuorumAIOCB *acb); | |
224 | ||
225 | static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) | |
226 | { | |
227 | BDRVQuorumState *s = acb->common.bs->opaque; | |
228 | ||
229 | if (acb->success_count < s->threshold) { | |
230 | acb->vote_ret = quorum_vote_error(acb); | |
231 | quorum_report_failure(acb); | |
232 | return true; | |
233 | } | |
234 | ||
235 | return false; | |
236 | } | |
237 | ||
13e7956e BC |
238 | static void quorum_aio_cb(void *opaque, int ret) |
239 | { | |
240 | QuorumChildRequest *sacb = opaque; | |
241 | QuorumAIOCB *acb = sacb->parent; | |
242 | BDRVQuorumState *s = acb->common.bs->opaque; | |
243 | ||
244 | sacb->ret = ret; | |
245 | acb->count++; | |
246 | if (ret == 0) { | |
247 | acb->success_count++; | |
95c6bff3 BC |
248 | } else { |
249 | quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret); | |
13e7956e BC |
250 | } |
251 | assert(acb->count <= s->num_children); | |
252 | assert(acb->success_count <= s->num_children); | |
253 | if (acb->count < s->num_children) { | |
254 | return; | |
255 | } | |
256 | ||
95c6bff3 BC |
257 | /* Do the vote on read */ |
258 | if (acb->is_read) { | |
259 | quorum_vote(acb); | |
260 | } else { | |
261 | quorum_has_too_much_io_failed(acb); | |
262 | } | |
263 | ||
13e7956e BC |
264 | quorum_aio_finalize(acb); |
265 | } | |
266 | ||
95c6bff3 BC |
267 | static void quorum_report_bad_versions(BDRVQuorumState *s, |
268 | QuorumAIOCB *acb, | |
269 | QuorumVoteValue *value) | |
270 | { | |
271 | QuorumVoteVersion *version; | |
272 | QuorumVoteItem *item; | |
273 | ||
274 | QLIST_FOREACH(version, &acb->votes.vote_list, next) { | |
275 | if (acb->votes.compare(&version->value, value)) { | |
276 | continue; | |
277 | } | |
278 | QLIST_FOREACH(item, &version->items, next) { | |
279 | quorum_report_bad(acb, s->bs[item->index]->node_name, 0); | |
280 | } | |
281 | } | |
282 | } | |
283 | ||
284 | static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) | |
285 | { | |
286 | int i; | |
287 | assert(dest->niov == source->niov); | |
288 | assert(dest->size == source->size); | |
289 | for (i = 0; i < source->niov; i++) { | |
290 | assert(dest->iov[i].iov_len == source->iov[i].iov_len); | |
291 | memcpy(dest->iov[i].iov_base, | |
292 | source->iov[i].iov_base, | |
293 | source->iov[i].iov_len); | |
294 | } | |
295 | } | |
296 | ||
297 | static void quorum_count_vote(QuorumVotes *votes, | |
298 | QuorumVoteValue *value, | |
299 | int index) | |
300 | { | |
301 | QuorumVoteVersion *v = NULL, *version = NULL; | |
302 | QuorumVoteItem *item; | |
303 | ||
304 | /* look if we have something with this hash */ | |
305 | QLIST_FOREACH(v, &votes->vote_list, next) { | |
306 | if (votes->compare(&v->value, value)) { | |
307 | version = v; | |
308 | break; | |
309 | } | |
310 | } | |
311 | ||
312 | /* It's a version not yet in the list add it */ | |
313 | if (!version) { | |
314 | version = g_new0(QuorumVoteVersion, 1); | |
315 | QLIST_INIT(&version->items); | |
316 | memcpy(&version->value, value, sizeof(version->value)); | |
317 | version->index = index; | |
318 | version->vote_count = 0; | |
319 | QLIST_INSERT_HEAD(&votes->vote_list, version, next); | |
320 | } | |
321 | ||
322 | version->vote_count++; | |
323 | ||
324 | item = g_new0(QuorumVoteItem, 1); | |
325 | item->index = index; | |
326 | QLIST_INSERT_HEAD(&version->items, item, next); | |
327 | } | |
328 | ||
329 | static void quorum_free_vote_list(QuorumVotes *votes) | |
330 | { | |
331 | QuorumVoteVersion *version, *next_version; | |
332 | QuorumVoteItem *item, *next_item; | |
333 | ||
334 | QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) { | |
335 | QLIST_REMOVE(version, next); | |
336 | QLIST_FOREACH_SAFE(item, &version->items, next, next_item) { | |
337 | QLIST_REMOVE(item, next); | |
338 | g_free(item); | |
339 | } | |
340 | g_free(version); | |
341 | } | |
342 | } | |
343 | ||
344 | static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash) | |
345 | { | |
346 | int j, ret; | |
347 | gnutls_hash_hd_t dig; | |
348 | QEMUIOVector *qiov = &acb->qcrs[i].qiov; | |
349 | ||
350 | ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256); | |
351 | ||
352 | if (ret < 0) { | |
353 | return ret; | |
354 | } | |
355 | ||
356 | for (j = 0; j < qiov->niov; j++) { | |
357 | ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len); | |
358 | if (ret < 0) { | |
359 | break; | |
360 | } | |
361 | } | |
362 | ||
363 | gnutls_hash_deinit(dig, (void *) hash); | |
364 | return ret; | |
365 | } | |
366 | ||
367 | static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes) | |
368 | { | |
369 | int max = 0; | |
370 | QuorumVoteVersion *candidate, *winner = NULL; | |
371 | ||
372 | QLIST_FOREACH(candidate, &votes->vote_list, next) { | |
373 | if (candidate->vote_count > max) { | |
374 | max = candidate->vote_count; | |
375 | winner = candidate; | |
376 | } | |
377 | } | |
378 | ||
379 | return winner; | |
380 | } | |
381 | ||
382 | /* qemu_iovec_compare is handy for blkverify mode because it returns the first | |
383 | * differing byte location. Yet it is handcoded to compare vectors one byte | |
384 | * after another so it does not benefit from the libc SIMD optimizations. | |
385 | * quorum_iovec_compare is written for speed and should be used in the non | |
386 | * blkverify mode of quorum. | |
387 | */ | |
388 | static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) | |
389 | { | |
390 | int i; | |
391 | int result; | |
392 | ||
393 | assert(a->niov == b->niov); | |
394 | for (i = 0; i < a->niov; i++) { | |
395 | assert(a->iov[i].iov_len == b->iov[i].iov_len); | |
396 | result = memcmp(a->iov[i].iov_base, | |
397 | b->iov[i].iov_base, | |
398 | a->iov[i].iov_len); | |
399 | if (result) { | |
400 | return false; | |
401 | } | |
402 | } | |
403 | ||
404 | return true; | |
405 | } | |
406 | ||
407 | static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb, | |
408 | const char *fmt, ...) | |
409 | { | |
410 | va_list ap; | |
411 | ||
412 | va_start(ap, fmt); | |
413 | fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ", | |
414 | acb->sector_num, acb->nb_sectors); | |
415 | vfprintf(stderr, fmt, ap); | |
416 | fprintf(stderr, "\n"); | |
417 | va_end(ap); | |
418 | exit(1); | |
419 | } | |
420 | ||
421 | static bool quorum_compare(QuorumAIOCB *acb, | |
422 | QEMUIOVector *a, | |
423 | QEMUIOVector *b) | |
424 | { | |
425 | BDRVQuorumState *s = acb->common.bs->opaque; | |
426 | ssize_t offset; | |
427 | ||
428 | /* This driver will replace blkverify in this particular case */ | |
429 | if (s->is_blkverify) { | |
430 | offset = qemu_iovec_compare(a, b); | |
431 | if (offset != -1) { | |
432 | quorum_err(acb, "contents mismatch in sector %" PRId64, | |
433 | acb->sector_num + | |
434 | (uint64_t)(offset / BDRV_SECTOR_SIZE)); | |
435 | } | |
436 | return true; | |
437 | } | |
438 | ||
439 | return quorum_iovec_compare(a, b); | |
440 | } | |
441 | ||
442 | /* Do a vote to get the error code */ | |
443 | static int quorum_vote_error(QuorumAIOCB *acb) | |
444 | { | |
445 | BDRVQuorumState *s = acb->common.bs->opaque; | |
446 | QuorumVoteVersion *winner = NULL; | |
447 | QuorumVotes error_votes; | |
448 | QuorumVoteValue result_value; | |
449 | int i, ret = 0; | |
450 | bool error = false; | |
451 | ||
452 | QLIST_INIT(&error_votes.vote_list); | |
453 | error_votes.compare = quorum_64bits_compare; | |
454 | ||
455 | for (i = 0; i < s->num_children; i++) { | |
456 | ret = acb->qcrs[i].ret; | |
457 | if (ret) { | |
458 | error = true; | |
459 | result_value.l = ret; | |
460 | quorum_count_vote(&error_votes, &result_value, i); | |
461 | } | |
462 | } | |
463 | ||
464 | if (error) { | |
465 | winner = quorum_get_vote_winner(&error_votes); | |
466 | ret = winner->value.l; | |
467 | } | |
468 | ||
469 | quorum_free_vote_list(&error_votes); | |
470 | ||
471 | return ret; | |
472 | } | |
473 | ||
474 | static void quorum_vote(QuorumAIOCB *acb) | |
475 | { | |
476 | bool quorum = true; | |
477 | int i, j, ret; | |
478 | QuorumVoteValue hash; | |
479 | BDRVQuorumState *s = acb->common.bs->opaque; | |
480 | QuorumVoteVersion *winner; | |
481 | ||
482 | if (quorum_has_too_much_io_failed(acb)) { | |
483 | return; | |
484 | } | |
485 | ||
486 | /* get the index of the first successful read */ | |
487 | for (i = 0; i < s->num_children; i++) { | |
488 | if (!acb->qcrs[i].ret) { | |
489 | break; | |
490 | } | |
491 | } | |
492 | ||
493 | assert(i < s->num_children); | |
494 | ||
495 | /* compare this read with all other successful reads stopping at quorum | |
496 | * failure | |
497 | */ | |
498 | for (j = i + 1; j < s->num_children; j++) { | |
499 | if (acb->qcrs[j].ret) { | |
500 | continue; | |
501 | } | |
502 | quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov); | |
503 | if (!quorum) { | |
504 | break; | |
505 | } | |
506 | } | |
507 | ||
508 | /* Every successful read agrees */ | |
509 | if (quorum) { | |
510 | quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); | |
511 | return; | |
512 | } | |
513 | ||
514 | /* compute hashes for each successful read, also store indexes */ | |
515 | for (i = 0; i < s->num_children; i++) { | |
516 | if (acb->qcrs[i].ret) { | |
517 | continue; | |
518 | } | |
519 | ret = quorum_compute_hash(acb, i, &hash); | |
520 | /* if ever the hash computation failed */ | |
521 | if (ret < 0) { | |
522 | acb->vote_ret = ret; | |
523 | goto free_exit; | |
524 | } | |
525 | quorum_count_vote(&acb->votes, &hash, i); | |
526 | } | |
527 | ||
528 | /* vote to select the most represented version */ | |
529 | winner = quorum_get_vote_winner(&acb->votes); | |
530 | ||
531 | /* if the winner count is smaller than threshold the read fails */ | |
532 | if (winner->vote_count < s->threshold) { | |
533 | quorum_report_failure(acb); | |
534 | acb->vote_ret = -EIO; | |
535 | goto free_exit; | |
536 | } | |
537 | ||
538 | /* we have a winner: copy it */ | |
539 | quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov); | |
540 | ||
541 | /* some versions are bad print them */ | |
542 | quorum_report_bad_versions(s, acb, &winner->value); | |
543 | ||
544 | free_exit: | |
545 | /* free lists */ | |
546 | quorum_free_vote_list(&acb->votes); | |
547 | } | |
548 | ||
7db6982a BC |
549 | static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs, |
550 | int64_t sector_num, | |
551 | QEMUIOVector *qiov, | |
552 | int nb_sectors, | |
553 | BlockDriverCompletionFunc *cb, | |
554 | void *opaque) | |
555 | { | |
556 | BDRVQuorumState *s = bs->opaque; | |
557 | QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, | |
558 | nb_sectors, cb, opaque); | |
559 | int i; | |
560 | ||
561 | acb->is_read = true; | |
562 | ||
563 | for (i = 0; i < s->num_children; i++) { | |
564 | acb->qcrs[i].buf = qemu_blockalign(s->bs[i], qiov->size); | |
565 | qemu_iovec_init(&acb->qcrs[i].qiov, qiov->niov); | |
566 | qemu_iovec_clone(&acb->qcrs[i].qiov, qiov, acb->qcrs[i].buf); | |
567 | } | |
568 | ||
569 | for (i = 0; i < s->num_children; i++) { | |
95c6bff3 | 570 | bdrv_aio_readv(s->bs[i], sector_num, &acb->qcrs[i].qiov, nb_sectors, |
7db6982a BC |
571 | quorum_aio_cb, &acb->qcrs[i]); |
572 | } | |
573 | ||
574 | return &acb->common; | |
575 | } | |
576 | ||
13e7956e BC |
577 | static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs, |
578 | int64_t sector_num, | |
579 | QEMUIOVector *qiov, | |
580 | int nb_sectors, | |
581 | BlockDriverCompletionFunc *cb, | |
582 | void *opaque) | |
583 | { | |
584 | BDRVQuorumState *s = bs->opaque; | |
585 | QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors, | |
586 | cb, opaque); | |
587 | int i; | |
588 | ||
589 | for (i = 0; i < s->num_children; i++) { | |
590 | acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov, | |
591 | nb_sectors, &quorum_aio_cb, | |
592 | &acb->qcrs[i]); | |
593 | } | |
594 | ||
595 | return &acb->common; | |
596 | } | |
597 | ||
d55dee20 BC |
598 | static int64_t quorum_getlength(BlockDriverState *bs) |
599 | { | |
600 | BDRVQuorumState *s = bs->opaque; | |
601 | int64_t result; | |
602 | int i; | |
603 | ||
604 | /* check that all file have the same length */ | |
605 | result = bdrv_getlength(s->bs[0]); | |
606 | if (result < 0) { | |
607 | return result; | |
608 | } | |
609 | for (i = 1; i < s->num_children; i++) { | |
610 | int64_t value = bdrv_getlength(s->bs[i]); | |
611 | if (value < 0) { | |
612 | return value; | |
613 | } | |
614 | if (value != result) { | |
615 | return -EIO; | |
616 | } | |
617 | } | |
618 | ||
619 | return result; | |
620 | } | |
621 | ||
a28e4c40 BC |
622 | static void quorum_invalidate_cache(BlockDriverState *bs) |
623 | { | |
624 | BDRVQuorumState *s = bs->opaque; | |
625 | int i; | |
626 | ||
627 | for (i = 0; i < s->num_children; i++) { | |
628 | bdrv_invalidate_cache(s->bs[i]); | |
629 | } | |
630 | } | |
631 | ||
cadebd7a BC |
632 | static BlockDriver bdrv_quorum = { |
633 | .format_name = "quorum", | |
634 | .protocol_name = "quorum", | |
635 | ||
636 | .instance_size = sizeof(BDRVQuorumState), | |
13e7956e | 637 | |
d55dee20 BC |
638 | .bdrv_getlength = quorum_getlength, |
639 | ||
7db6982a | 640 | .bdrv_aio_readv = quorum_aio_readv, |
13e7956e | 641 | .bdrv_aio_writev = quorum_aio_writev, |
a28e4c40 | 642 | .bdrv_invalidate_cache = quorum_invalidate_cache, |
cadebd7a BC |
643 | }; |
644 | ||
645 | static void bdrv_quorum_init(void) | |
646 | { | |
647 | bdrv_register(&bdrv_quorum); | |
648 | } | |
649 | ||
650 | block_init(bdrv_quorum_init); |