]>
Commit | Line | Data |
---|---|---|
27cec15e BC |
1 | /* |
2 | * Quorum Block filter | |
3 | * | |
4 | * Copyright (C) 2012-2014 Nodalink, EURL. | |
5 | * | |
6 | * Author: | |
7 | * Benoît Canet <benoit.canet@irqsave.net> | |
8 | * | |
9 | * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp) | |
10 | * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc). | |
11 | * | |
12 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
13 | * See the COPYING file in the top-level directory. | |
14 | */ | |
15 | ||
16 | #include "block/block_int.h" | |
17 | ||
cadebd7a BC |
18 | /* the following structure holds the state of one quorum instance */ |
19 | typedef struct BDRVQuorumState { | |
20 | BlockDriverState **bs; /* children BlockDriverStates */ | |
21 | int num_children; /* children count */ | |
22 | int threshold; /* if less than threshold children reads gave the | |
23 | * same result a quorum error occurs. | |
24 | */ | |
25 | bool is_blkverify; /* true if the driver is in blkverify mode | |
26 | * Writes are mirrored on two children devices. | |
27 | * On reads the two children devices' contents are | |
28 | * compared and if a difference is spotted its | |
29 | * location is printed and the code aborts. | |
30 | * It is useful to debug other block drivers by | |
31 | * comparing them with a reference one. | |
32 | */ | |
33 | } BDRVQuorumState; | |
34 | ||
27cec15e BC |
35 | typedef struct QuorumAIOCB QuorumAIOCB; |
36 | ||
37 | /* Quorum will create one instance of the following structure per operation it | |
38 | * performs on its children. | |
39 | * So for each read/write operation coming from the upper layer there will be | |
40 | * $children_count QuorumChildRequest. | |
41 | */ | |
42 | typedef struct QuorumChildRequest { | |
43 | BlockDriverAIOCB *aiocb; | |
44 | QEMUIOVector qiov; | |
45 | uint8_t *buf; | |
46 | int ret; | |
47 | QuorumAIOCB *parent; | |
48 | } QuorumChildRequest; | |
49 | ||
50 | /* Quorum will use the following structure to track progress of each read/write | |
51 | * operation received by the upper layer. | |
52 | * This structure hold pointers to the QuorumChildRequest structures instances | |
53 | * used to do operations on each children and track overall progress. | |
54 | */ | |
55 | struct QuorumAIOCB { | |
56 | BlockDriverAIOCB common; | |
57 | ||
58 | /* Request metadata */ | |
59 | uint64_t sector_num; | |
60 | int nb_sectors; | |
61 | ||
62 | QEMUIOVector *qiov; /* calling IOV */ | |
63 | ||
64 | QuorumChildRequest *qcrs; /* individual child requests */ | |
65 | int count; /* number of completed AIOCB */ | |
66 | int success_count; /* number of successfully completed AIOCB */ | |
67 | ||
68 | bool is_read; | |
69 | int vote_ret; | |
70 | }; | |
cadebd7a | 71 | |
13e7956e BC |
72 | static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) |
73 | { | |
74 | QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common); | |
75 | BDRVQuorumState *s = acb->common.bs->opaque; | |
76 | int i; | |
77 | ||
78 | /* cancel all callbacks */ | |
79 | for (i = 0; i < s->num_children; i++) { | |
80 | bdrv_aio_cancel(acb->qcrs[i].aiocb); | |
81 | } | |
82 | ||
83 | g_free(acb->qcrs); | |
84 | qemu_aio_release(acb); | |
85 | } | |
86 | ||
87 | static AIOCBInfo quorum_aiocb_info = { | |
88 | .aiocb_size = sizeof(QuorumAIOCB), | |
89 | .cancel = quorum_aio_cancel, | |
90 | }; | |
91 | ||
92 | static void quorum_aio_finalize(QuorumAIOCB *acb) | |
93 | { | |
94 | int ret = 0; | |
95 | ||
96 | acb->common.cb(acb->common.opaque, ret); | |
97 | ||
98 | g_free(acb->qcrs); | |
99 | qemu_aio_release(acb); | |
100 | } | |
101 | ||
102 | static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, | |
103 | BlockDriverState *bs, | |
104 | QEMUIOVector *qiov, | |
105 | uint64_t sector_num, | |
106 | int nb_sectors, | |
107 | BlockDriverCompletionFunc *cb, | |
108 | void *opaque) | |
109 | { | |
110 | QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque); | |
111 | int i; | |
112 | ||
113 | acb->common.bs->opaque = s; | |
114 | acb->sector_num = sector_num; | |
115 | acb->nb_sectors = nb_sectors; | |
116 | acb->qiov = qiov; | |
117 | acb->qcrs = g_new0(QuorumChildRequest, s->num_children); | |
118 | acb->count = 0; | |
119 | acb->success_count = 0; | |
120 | acb->is_read = false; | |
121 | acb->vote_ret = 0; | |
122 | ||
123 | for (i = 0; i < s->num_children; i++) { | |
124 | acb->qcrs[i].buf = NULL; | |
125 | acb->qcrs[i].ret = 0; | |
126 | acb->qcrs[i].parent = acb; | |
127 | } | |
128 | ||
129 | return acb; | |
130 | } | |
131 | ||
132 | static void quorum_aio_cb(void *opaque, int ret) | |
133 | { | |
134 | QuorumChildRequest *sacb = opaque; | |
135 | QuorumAIOCB *acb = sacb->parent; | |
136 | BDRVQuorumState *s = acb->common.bs->opaque; | |
137 | ||
138 | sacb->ret = ret; | |
139 | acb->count++; | |
140 | if (ret == 0) { | |
141 | acb->success_count++; | |
142 | } | |
143 | assert(acb->count <= s->num_children); | |
144 | assert(acb->success_count <= s->num_children); | |
145 | if (acb->count < s->num_children) { | |
146 | return; | |
147 | } | |
148 | ||
149 | quorum_aio_finalize(acb); | |
150 | } | |
151 | ||
152 | static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs, | |
153 | int64_t sector_num, | |
154 | QEMUIOVector *qiov, | |
155 | int nb_sectors, | |
156 | BlockDriverCompletionFunc *cb, | |
157 | void *opaque) | |
158 | { | |
159 | BDRVQuorumState *s = bs->opaque; | |
160 | QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors, | |
161 | cb, opaque); | |
162 | int i; | |
163 | ||
164 | for (i = 0; i < s->num_children; i++) { | |
165 | acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov, | |
166 | nb_sectors, &quorum_aio_cb, | |
167 | &acb->qcrs[i]); | |
168 | } | |
169 | ||
170 | return &acb->common; | |
171 | } | |
172 | ||
cadebd7a BC |
173 | static BlockDriver bdrv_quorum = { |
174 | .format_name = "quorum", | |
175 | .protocol_name = "quorum", | |
176 | ||
177 | .instance_size = sizeof(BDRVQuorumState), | |
13e7956e BC |
178 | |
179 | .bdrv_aio_writev = quorum_aio_writev, | |
cadebd7a BC |
180 | }; |
181 | ||
182 | static void bdrv_quorum_init(void) | |
183 | { | |
184 | bdrv_register(&bdrv_quorum); | |
185 | } | |
186 | ||
187 | block_init(bdrv_quorum_init); |