]>
Commit | Line | Data |
---|---|---|
298800ca SH |
1 | /* |
2 | * QEMU Enhanced Disk Format Table I/O | |
3 | * | |
4 | * Copyright IBM, Corp. 2010 | |
5 | * | |
6 | * Authors: | |
7 | * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> | |
8 | * Anthony Liguori <aliguori@us.ibm.com> | |
9 | * | |
10 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
11 | * See the COPYING.LIB file in the top-level directory. | |
12 | * | |
13 | */ | |
14 | ||
80c71a24 | 15 | #include "qemu/osdep.h" |
298800ca | 16 | #include "trace.h" |
1de7afc9 | 17 | #include "qemu/sockets.h" /* for EINPROGRESS on Windows */ |
298800ca | 18 | #include "qed.h" |
58369e22 | 19 | #include "qemu/bswap.h" |
298800ca SH |
20 | |
21 | typedef struct { | |
22 | GenericCB gencb; | |
23 | BDRVQEDState *s; | |
24 | QEDTable *table; | |
25 | ||
26 | struct iovec iov; | |
27 | QEMUIOVector qiov; | |
28 | } QEDReadTableCB; | |
29 | ||
30 | static void qed_read_table_cb(void *opaque, int ret) | |
31 | { | |
32 | QEDReadTableCB *read_table_cb = opaque; | |
33 | QEDTable *table = read_table_cb->table; | |
b9e413dd | 34 | BDRVQEDState *s = read_table_cb->s; |
23e9a39e | 35 | int noffsets = read_table_cb->qiov.size / sizeof(uint64_t); |
298800ca SH |
36 | int i; |
37 | ||
38 | /* Handle I/O error */ | |
39 | if (ret) { | |
40 | goto out; | |
41 | } | |
42 | ||
43 | /* Byteswap offsets */ | |
b9e413dd | 44 | qed_acquire(s); |
298800ca SH |
45 | for (i = 0; i < noffsets; i++) { |
46 | table->offsets[i] = le64_to_cpu(table->offsets[i]); | |
47 | } | |
b9e413dd | 48 | qed_release(s); |
298800ca SH |
49 | |
50 | out: | |
51 | /* Completion */ | |
b9e413dd | 52 | trace_qed_read_table_cb(s, read_table_cb->table, ret); |
298800ca SH |
53 | gencb_complete(&read_table_cb->gencb, ret); |
54 | } | |
55 | ||
56 | static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, | |
097310b5 | 57 | BlockCompletionFunc *cb, void *opaque) |
298800ca SH |
58 | { |
59 | QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb), | |
60 | cb, opaque); | |
61 | QEMUIOVector *qiov = &read_table_cb->qiov; | |
298800ca SH |
62 | |
63 | trace_qed_read_table(s, offset, table); | |
64 | ||
65 | read_table_cb->s = s; | |
66 | read_table_cb->table = table; | |
67 | read_table_cb->iov.iov_base = table->offsets, | |
68 | read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size, | |
69 | ||
70 | qemu_iovec_init_external(qiov, &read_table_cb->iov, 1); | |
ebb7af21 | 71 | bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov, |
ad54ae80 PB |
72 | qiov->size / BDRV_SECTOR_SIZE, |
73 | qed_read_table_cb, read_table_cb); | |
298800ca SH |
74 | } |
75 | ||
76 | typedef struct { | |
77 | GenericCB gencb; | |
78 | BDRVQEDState *s; | |
79 | QEDTable *orig_table; | |
80 | QEDTable *table; | |
81 | bool flush; /* flush after write? */ | |
82 | ||
83 | struct iovec iov; | |
84 | QEMUIOVector qiov; | |
85 | } QEDWriteTableCB; | |
86 | ||
87 | static void qed_write_table_cb(void *opaque, int ret) | |
88 | { | |
89 | QEDWriteTableCB *write_table_cb = opaque; | |
b9e413dd | 90 | BDRVQEDState *s = write_table_cb->s; |
298800ca | 91 | |
b9e413dd | 92 | trace_qed_write_table_cb(s, |
298800ca SH |
93 | write_table_cb->orig_table, |
94 | write_table_cb->flush, | |
95 | ret); | |
96 | ||
97 | if (ret) { | |
98 | goto out; | |
99 | } | |
100 | ||
101 | if (write_table_cb->flush) { | |
102 | /* We still need to flush first */ | |
103 | write_table_cb->flush = false; | |
b9e413dd | 104 | qed_acquire(s); |
298800ca SH |
105 | bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb, |
106 | write_table_cb); | |
b9e413dd | 107 | qed_release(s); |
298800ca SH |
108 | return; |
109 | } | |
110 | ||
111 | out: | |
112 | qemu_vfree(write_table_cb->table); | |
113 | gencb_complete(&write_table_cb->gencb, ret); | |
298800ca SH |
114 | } |
115 | ||
116 | /** | |
117 | * Write out an updated part or all of a table | |
118 | * | |
119 | * @s: QED state | |
120 | * @offset: Offset of table in image file, in bytes | |
121 | * @table: Table | |
122 | * @index: Index of first element | |
123 | * @n: Number of elements | |
124 | * @flush: Whether or not to sync to disk | |
125 | * @cb: Completion function | |
126 | * @opaque: Argument for completion function | |
127 | */ | |
128 | static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, | |
129 | unsigned int index, unsigned int n, bool flush, | |
097310b5 | 130 | BlockCompletionFunc *cb, void *opaque) |
298800ca SH |
131 | { |
132 | QEDWriteTableCB *write_table_cb; | |
298800ca SH |
133 | unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; |
134 | unsigned int start, end, i; | |
135 | size_t len_bytes; | |
136 | ||
137 | trace_qed_write_table(s, offset, table, index, n); | |
138 | ||
139 | /* Calculate indices of the first and one after last elements */ | |
140 | start = index & ~sector_mask; | |
141 | end = (index + n + sector_mask) & ~sector_mask; | |
142 | ||
143 | len_bytes = (end - start) * sizeof(uint64_t); | |
144 | ||
145 | write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque); | |
146 | write_table_cb->s = s; | |
147 | write_table_cb->orig_table = table; | |
148 | write_table_cb->flush = flush; | |
149 | write_table_cb->table = qemu_blockalign(s->bs, len_bytes); | |
150 | write_table_cb->iov.iov_base = write_table_cb->table->offsets; | |
151 | write_table_cb->iov.iov_len = len_bytes; | |
152 | qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1); | |
153 | ||
154 | /* Byteswap table */ | |
155 | for (i = start; i < end; i++) { | |
156 | uint64_t le_offset = cpu_to_le64(table->offsets[i]); | |
157 | write_table_cb->table->offsets[i - start] = le_offset; | |
158 | } | |
159 | ||
160 | /* Adjust for offset into table */ | |
161 | offset += start * sizeof(uint64_t); | |
162 | ||
0d1049c7 | 163 | bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, |
ad54ae80 PB |
164 | &write_table_cb->qiov, |
165 | write_table_cb->qiov.size / BDRV_SECTOR_SIZE, | |
166 | qed_write_table_cb, write_table_cb); | |
298800ca SH |
167 | } |
168 | ||
169 | /** | |
170 | * Propagate return value from async callback | |
171 | */ | |
172 | static void qed_sync_cb(void *opaque, int ret) | |
173 | { | |
174 | *(int *)opaque = ret; | |
175 | } | |
176 | ||
177 | int qed_read_l1_table_sync(BDRVQEDState *s) | |
178 | { | |
179 | int ret = -EINPROGRESS; | |
180 | ||
298800ca SH |
181 | qed_read_table(s, s->header.l1_table_offset, |
182 | s->l1_table, qed_sync_cb, &ret); | |
88b062c2 | 183 | BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); |
298800ca | 184 | |
298800ca SH |
185 | return ret; |
186 | } | |
187 | ||
188 | void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, | |
097310b5 | 189 | BlockCompletionFunc *cb, void *opaque) |
298800ca SH |
190 | { |
191 | BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); | |
192 | qed_write_table(s, s->header.l1_table_offset, | |
193 | s->l1_table, index, n, false, cb, opaque); | |
194 | } | |
195 | ||
196 | int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, | |
197 | unsigned int n) | |
198 | { | |
199 | int ret = -EINPROGRESS; | |
200 | ||
298800ca | 201 | qed_write_l1_table(s, index, n, qed_sync_cb, &ret); |
88b062c2 | 202 | BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); |
298800ca | 203 | |
298800ca SH |
204 | return ret; |
205 | } | |
206 | ||
207 | typedef struct { | |
208 | GenericCB gencb; | |
209 | BDRVQEDState *s; | |
210 | uint64_t l2_offset; | |
211 | QEDRequest *request; | |
212 | } QEDReadL2TableCB; | |
213 | ||
214 | static void qed_read_l2_table_cb(void *opaque, int ret) | |
215 | { | |
216 | QEDReadL2TableCB *read_l2_table_cb = opaque; | |
217 | QEDRequest *request = read_l2_table_cb->request; | |
218 | BDRVQEDState *s = read_l2_table_cb->s; | |
219 | CachedL2Table *l2_table = request->l2_table; | |
e4fc8781 | 220 | uint64_t l2_offset = read_l2_table_cb->l2_offset; |
298800ca | 221 | |
b9e413dd | 222 | qed_acquire(s); |
298800ca SH |
223 | if (ret) { |
224 | /* can't trust loaded L2 table anymore */ | |
225 | qed_unref_l2_cache_entry(l2_table); | |
226 | request->l2_table = NULL; | |
227 | } else { | |
e4fc8781 | 228 | l2_table->offset = l2_offset; |
298800ca SH |
229 | |
230 | qed_commit_l2_cache_entry(&s->l2_cache, l2_table); | |
231 | ||
232 | /* This is guaranteed to succeed because we just committed the entry | |
233 | * to the cache. | |
234 | */ | |
e4fc8781 | 235 | request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); |
298800ca SH |
236 | assert(request->l2_table != NULL); |
237 | } | |
b9e413dd | 238 | qed_release(s); |
298800ca SH |
239 | |
240 | gencb_complete(&read_l2_table_cb->gencb, ret); | |
241 | } | |
242 | ||
243 | void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, | |
097310b5 | 244 | BlockCompletionFunc *cb, void *opaque) |
298800ca SH |
245 | { |
246 | QEDReadL2TableCB *read_l2_table_cb; | |
247 | ||
248 | qed_unref_l2_cache_entry(request->l2_table); | |
249 | ||
250 | /* Check for cached L2 entry */ | |
251 | request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); | |
252 | if (request->l2_table) { | |
253 | cb(opaque, 0); | |
254 | return; | |
255 | } | |
256 | ||
257 | request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); | |
258 | request->l2_table->table = qed_alloc_table(s); | |
259 | ||
260 | read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque); | |
261 | read_l2_table_cb->s = s; | |
262 | read_l2_table_cb->l2_offset = offset; | |
263 | read_l2_table_cb->request = request; | |
264 | ||
265 | BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); | |
266 | qed_read_table(s, offset, request->l2_table->table, | |
267 | qed_read_l2_table_cb, read_l2_table_cb); | |
268 | } | |
269 | ||
270 | int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset) | |
271 | { | |
272 | int ret = -EINPROGRESS; | |
273 | ||
298800ca | 274 | qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); |
88b062c2 | 275 | BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); |
298800ca | 276 | |
298800ca SH |
277 | return ret; |
278 | } | |
279 | ||
280 | void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, | |
281 | unsigned int index, unsigned int n, bool flush, | |
097310b5 | 282 | BlockCompletionFunc *cb, void *opaque) |
298800ca SH |
283 | { |
284 | BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE); | |
285 | qed_write_table(s, request->l2_table->offset, | |
286 | request->l2_table->table, index, n, flush, cb, opaque); | |
287 | } | |
288 | ||
289 | int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, | |
290 | unsigned int index, unsigned int n, bool flush) | |
291 | { | |
292 | int ret = -EINPROGRESS; | |
293 | ||
298800ca | 294 | qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); |
88b062c2 | 295 | BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS); |
298800ca | 296 | |
298800ca SH |
297 | return ret; |
298 | } |