Coroutine *co;
/* Request metadata */
- uint64_t sector_num;
- int nb_sectors;
+ uint64_t offset;
+ uint64_t bytes;
QEMUIOVector *qiov; /* calling IOV */
static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
- uint64_t sector_num,
- int nb_sectors)
+ uint64_t offset,
+ uint64_t bytes)
{
BDRVQuorumState *s = bs->opaque;
QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
int i;
- acb->co = qemu_coroutine_self();
- acb->bs = bs;
- acb->sector_num = sector_num;
- acb->nb_sectors = nb_sectors;
- acb->qiov = qiov;
- acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
- acb->count = 0;
- acb->success_count = 0;
- acb->rewrite_count = 0;
- acb->votes.compare = quorum_sha256_compare;
- QLIST_INIT(&acb->votes.vote_list);
- acb->is_read = false;
- acb->vote_ret = 0;
+ *acb = (QuorumAIOCB) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov = qiov,
+ .votes.compare = quorum_sha256_compare,
+ .votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
+ };
+ acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
for (i = 0; i < s->num_children; i++) {
acb->qcrs[i].buf = NULL;
acb->qcrs[i].ret = 0;
return acb;
}
-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
- int nb_sectors, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t offset,
+ uint64_t bytes, char *node_name, int ret)
{
const char *msg = NULL;
+ int64_t start_sector = offset / BDRV_SECTOR_SIZE;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
- sector_num, nb_sectors, &error_abort);
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector,
+ end_sector - start_sector, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
{
const char *reference = bdrv_get_device_or_node_name(acb->bs);
- qapi_event_send_quorum_failure(reference, acb->sector_num,
- acb->nb_sectors, &error_abort);
+ int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE;
+ int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes,
+ BDRV_SECTOR_SIZE);
+
+ qapi_event_send_quorum_failure(reference, start_sector,
+ end_sector - start_sector, &error_abort);
}
static int quorum_vote_error(QuorumAIOCB *acb);
{
QuorumAIOCB *acb = sacb->parent;
QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
- quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
- sacb->bs->node_name, ret);
-}
-
-static int quorum_fifo_aio_cb(void *opaque, int ret)
-{
- QuorumChildRequest *sacb = opaque;
- QuorumAIOCB *acb = sacb->parent;
- BDRVQuorumState *s = acb->bs->opaque;
-
- assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
-
- if (ret < 0) {
- quorum_report_bad_acb(sacb, ret);
-
- /* We try to read next child in FIFO order if we fail to read */
- if (acb->children_read < s->num_children) {
- return read_fifo_child(acb);
- }
- }
-
- acb->vote_ret = ret;
-
- /* FIXME: rewrite failed children if acb->children_read > 1? */
-
- return ret;
+ quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret);
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
- acb->nb_sectors,
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes,
s->children[item->index]->bs->node_name, 0);
}
}
/* Ignore any errors, it's just a correction attempt for already
* corrupted data. */
- bdrv_co_pwritev(s->children[co->idx],
- acb->sector_num * BDRV_SECTOR_SIZE,
- acb->nb_sectors * BDRV_SECTOR_SIZE,
+ bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
acb->qiov, 0);
/* Wake up the caller after the last rewrite */
va_list ap;
va_start(ap, fmt);
- fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
- acb->sector_num, acb->nb_sectors);
+ fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ",
+ acb->offset, acb->bytes);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
if (s->is_blkverify) {
offset = qemu_iovec_compare(a, b);
if (offset != -1) {
- quorum_err(acb, "contents mismatch in sector %" PRId64,
- acb->sector_num +
- (uint64_t)(offset / BDRV_SECTOR_SIZE));
+ quorum_err(acb, "contents mismatch at offset %" PRIu64,
+ acb->offset + offset);
}
return true;
}
QuorumChildRequest *sacb = &acb->qcrs[i];
sacb->bs = s->children[i]->bs;
- sacb->ret = bdrv_co_preadv(s->children[i],
- acb->sector_num * BDRV_SECTOR_SIZE,
- acb->nb_sectors * BDRV_SECTOR_SIZE,
+ sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes,
&acb->qcrs[i].qiov, 0);
if (sacb->ret == 0) {
static int read_fifo_child(QuorumAIOCB *acb)
{
BDRVQuorumState *s = acb->bs->opaque;
- int n = acb->children_read++;
- int ret;
+ int n, ret;
+
+ /* We try to read the next child in FIFO order if we failed to read */
+ do {
+ n = acb->children_read++;
+ acb->qcrs[n].bs = s->children[n]->bs;
+ ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes,
+ acb->qiov, 0);
+ if (ret < 0) {
+ quorum_report_bad_acb(&acb->qcrs[n], ret);
+ }
+ } while (ret < 0 && acb->children_read < s->num_children);
- acb->qcrs[n].bs = s->children[n]->bs;
- ret = bdrv_co_preadv(s->children[n], acb->sector_num * BDRV_SECTOR_SIZE,
- acb->nb_sectors * BDRV_SECTOR_SIZE, acb->qiov, 0);
- ret = quorum_fifo_aio_cb(&acb->qcrs[n], ret);
+ /* FIXME: rewrite failed children if acb->children_read > 1? */
return ret;
}
-static int quorum_co_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *qiov)
+static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
int ret;
acb->is_read = true;
QuorumChildRequest *sacb = &acb->qcrs[i];
sacb->bs = s->children[i]->bs;
- sacb->ret = bdrv_co_pwritev(s->children[i],
- acb->sector_num * BDRV_SECTOR_SIZE,
- acb->nb_sectors * BDRV_SECTOR_SIZE,
+ sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
acb->qiov, 0);
if (sacb->ret == 0) {
acb->success_count++;
}
}
-static int quorum_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *qiov)
+static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
BDRVQuorumState *s = bs->opaque;
- QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors);
+ QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
int i, ret;
for (i = 0; i < s->num_children; i++) {
result = bdrv_co_flush(s->children[i]->bs);
if (result) {
quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
- bdrv_nb_sectors(s->children[i]->bs),
+ bdrv_getlength(s->children[i]->bs),
s->children[i]->bs->node_name, result);
result_value.l = result;
quorum_count_vote(&error_votes, &result_value, i);
/* We can safely add the child now */
bdrv_ref(child_bs);
- child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+
+ child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
+ if (child == NULL) {
+ s->next_child_index--;
+ bdrv_unref(child_bs);
+ goto out;
+ }
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
+out:
bdrv_drained_end(bs);
}
.bdrv_getlength = quorum_getlength,
- .bdrv_co_readv = quorum_co_readv,
- .bdrv_co_writev = quorum_co_writev,
+ .bdrv_co_preadv = quorum_co_preadv,
+ .bdrv_co_pwritev = quorum_co_pwritev,
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
+ .bdrv_child_perm = bdrv_filter_default_perms,
+
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};