]>
Commit | Line | Data |
---|---|---|
4ea7d1a7 | 1 | /* |
ca072800 PD |
2 | * Copyright (c) 2018 Citrix Systems Inc. |
3 | * (c) Gerd Hoffmann <kraxel@redhat.com> | |
4ea7d1a7 | 4 | * |
ca072800 PD |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; under version 2 of the License. | |
4ea7d1a7 | 8 | * |
ca072800 PD |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
4ea7d1a7 | 13 | * |
ca072800 PD |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, see <http://www.gnu.org/licenses/>. | |
4ea7d1a7 | 16 | * |
ca072800 PD |
17 | * Contributions after 2012-01-13 are licensed under the terms of the |
18 | * GNU GPL, version 2 or (at your option) any later version. | |
4ea7d1a7 PD |
19 | */ |
20 | ||
fcab2b46 PD |
21 | #include "qemu/osdep.h" |
22 | #include "qemu/error-report.h" | |
db725815 | 23 | #include "qemu/main-loop.h" |
5df022cf | 24 | #include "qemu/memalign.h" |
fcab2b46 | 25 | #include "qapi/error.h" |
e2abfe5e | 26 | #include "hw/xen/xen.h" |
fcab2b46 | 27 | #include "hw/block/xen_blkif.h" |
e2abfe5e | 28 | #include "hw/xen/interface/io/ring.h" |
fcab2b46 PD |
29 | #include "sysemu/block-backend.h" |
30 | #include "sysemu/iothread.h" | |
31 | #include "xen-block.h" | |
32 | ||
e7f5b5f8 | 33 | typedef struct XenBlockRequest { |
fcab2b46 PD |
34 | blkif_request_t req; |
35 | int16_t status; | |
36 | off_t start; | |
37 | QEMUIOVector v; | |
38 | void *buf; | |
39 | size_t size; | |
40 | int presync; | |
41 | int aio_inflight; | |
42 | int aio_errors; | |
f3b604e3 | 43 | XenBlockDataPlane *dataplane; |
e7f5b5f8 | 44 | QLIST_ENTRY(XenBlockRequest) list; |
fcab2b46 | 45 | BlockAcctCookie acct; |
e7f5b5f8 | 46 | } XenBlockRequest; |
4ea7d1a7 | 47 | |
f3b604e3 | 48 | struct XenBlockDataPlane { |
fcab2b46 PD |
49 | XenDevice *xendev; |
50 | XenEventChannel *event_channel; | |
51 | unsigned int *ring_ref; | |
52 | unsigned int nr_ring_ref; | |
53 | void *sring; | |
fcab2b46 PD |
54 | int protocol; |
55 | blkif_back_rings_t rings; | |
56 | int more_work; | |
e7f5b5f8 | 57 | QLIST_HEAD(inflight_head, XenBlockRequest) inflight; |
e7f5b5f8 | 58 | QLIST_HEAD(freelist_head, XenBlockRequest) freelist; |
fcab2b46 PD |
59 | int requests_total; |
60 | int requests_inflight; | |
fcab2b46 PD |
61 | unsigned int max_requests; |
62 | BlockBackend *blk; | |
5feeb718 | 63 | unsigned int sector_size; |
fcab2b46 PD |
64 | QEMUBH *bh; |
65 | IOThread *iothread; | |
66 | AioContext *ctx; | |
4ea7d1a7 PD |
67 | }; |
68 | ||
36d883ba AP |
69 | static int xen_block_send_response(XenBlockRequest *request); |
70 | ||
d4683cf9 | 71 | static void reset_request(XenBlockRequest *request) |
4ea7d1a7 | 72 | { |
e7f5b5f8 PD |
73 | memset(&request->req, 0, sizeof(request->req)); |
74 | request->status = 0; | |
75 | request->start = 0; | |
e7f5b5f8 PD |
76 | request->size = 0; |
77 | request->presync = 0; | |
4ea7d1a7 | 78 | |
e7f5b5f8 PD |
79 | request->aio_inflight = 0; |
80 | request->aio_errors = 0; | |
4ea7d1a7 | 81 | |
e7f5b5f8 PD |
82 | request->dataplane = NULL; |
83 | memset(&request->list, 0, sizeof(request->list)); | |
84 | memset(&request->acct, 0, sizeof(request->acct)); | |
4ea7d1a7 | 85 | |
e7f5b5f8 | 86 | qemu_iovec_reset(&request->v); |
4ea7d1a7 PD |
87 | } |
88 | ||
d4683cf9 | 89 | static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 90 | { |
e7f5b5f8 | 91 | XenBlockRequest *request = NULL; |
4ea7d1a7 | 92 | |
f3b604e3 PD |
93 | if (QLIST_EMPTY(&dataplane->freelist)) { |
94 | if (dataplane->requests_total >= dataplane->max_requests) { | |
4ea7d1a7 PD |
95 | goto out; |
96 | } | |
97 | /* allocate new struct */ | |
e7f5b5f8 PD |
98 | request = g_malloc0(sizeof(*request)); |
99 | request->dataplane = dataplane; | |
c6025bd1 TS |
100 | /* |
101 | * We cannot need more pages per requests than this, and since we | |
102 | * re-use requests, allocate the memory once here. It will be freed | |
103 | * xen_block_dataplane_destroy() when the request list is freed. | |
104 | */ | |
a9ae1418 | 105 | request->buf = qemu_memalign(XEN_PAGE_SIZE, |
c6025bd1 | 106 | BLKIF_MAX_SEGMENTS_PER_REQUEST * |
a9ae1418 | 107 | XEN_PAGE_SIZE); |
f3b604e3 | 108 | dataplane->requests_total++; |
e7f5b5f8 | 109 | qemu_iovec_init(&request->v, 1); |
4ea7d1a7 PD |
110 | } else { |
111 | /* get one from freelist */ | |
e7f5b5f8 PD |
112 | request = QLIST_FIRST(&dataplane->freelist); |
113 | QLIST_REMOVE(request, list); | |
4ea7d1a7 | 114 | } |
e7f5b5f8 | 115 | QLIST_INSERT_HEAD(&dataplane->inflight, request, list); |
f3b604e3 | 116 | dataplane->requests_inflight++; |
4ea7d1a7 PD |
117 | |
118 | out: | |
e7f5b5f8 | 119 | return request; |
4ea7d1a7 PD |
120 | } |
121 | ||
36d883ba | 122 | static void xen_block_complete_request(XenBlockRequest *request) |
4ea7d1a7 | 123 | { |
e7f5b5f8 | 124 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 | 125 | |
36d883ba AP |
126 | if (xen_block_send_response(request)) { |
127 | Error *local_err = NULL; | |
4ea7d1a7 | 128 | |
36d883ba AP |
129 | xen_device_notify_event_channel(dataplane->xendev, |
130 | dataplane->event_channel, | |
131 | &local_err); | |
132 | if (local_err) { | |
133 | error_report_err(local_err); | |
134 | } | |
135 | } | |
4ea7d1a7 | 136 | |
e7f5b5f8 | 137 | QLIST_REMOVE(request, list); |
36d883ba | 138 | dataplane->requests_inflight--; |
d4683cf9 | 139 | reset_request(request); |
e7f5b5f8 PD |
140 | request->dataplane = dataplane; |
141 | QLIST_INSERT_HEAD(&dataplane->freelist, request, list); | |
4ea7d1a7 PD |
142 | } |
143 | ||
144 | /* | |
145 | * translate request into iovec + start offset | |
146 | * do sanity checks along the way | |
147 | */ | |
d4683cf9 | 148 | static int xen_block_parse_request(XenBlockRequest *request) |
4ea7d1a7 | 149 | { |
e7f5b5f8 | 150 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 PD |
151 | size_t len; |
152 | int i; | |
153 | ||
e7f5b5f8 | 154 | switch (request->req.operation) { |
4ea7d1a7 PD |
155 | case BLKIF_OP_READ: |
156 | break; | |
157 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 PD |
158 | request->presync = 1; |
159 | if (!request->req.nr_segments) { | |
4ea7d1a7 PD |
160 | return 0; |
161 | } | |
162 | /* fall through */ | |
163 | case BLKIF_OP_WRITE: | |
164 | break; | |
165 | case BLKIF_OP_DISCARD: | |
166 | return 0; | |
167 | default: | |
e7f5b5f8 | 168 | error_report("error: unknown operation (%d)", request->req.operation); |
4ea7d1a7 PD |
169 | goto err; |
170 | }; | |
171 | ||
e7f5b5f8 | 172 | if (request->req.operation != BLKIF_OP_READ && |
86b1cf32 | 173 | !blk_is_writable(dataplane->blk)) { |
ca072800 | 174 | error_report("error: write req for ro device"); |
4ea7d1a7 PD |
175 | goto err; |
176 | } | |
177 | ||
5feeb718 | 178 | request->start = request->req.sector_number * dataplane->sector_size; |
e7f5b5f8 | 179 | for (i = 0; i < request->req.nr_segments; i++) { |
4ea7d1a7 | 180 | if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
ca072800 | 181 | error_report("error: nr_segments too big"); |
4ea7d1a7 PD |
182 | goto err; |
183 | } | |
e7f5b5f8 | 184 | if (request->req.seg[i].first_sect > request->req.seg[i].last_sect) { |
ca072800 | 185 | error_report("error: first > last sector"); |
4ea7d1a7 PD |
186 | goto err; |
187 | } | |
5feeb718 | 188 | if (request->req.seg[i].last_sect * dataplane->sector_size >= |
a9ae1418 | 189 | XEN_PAGE_SIZE) { |
ca072800 | 190 | error_report("error: page crossing"); |
4ea7d1a7 PD |
191 | goto err; |
192 | } | |
193 | ||
e7f5b5f8 | 194 | len = (request->req.seg[i].last_sect - |
5feeb718 | 195 | request->req.seg[i].first_sect + 1) * dataplane->sector_size; |
e7f5b5f8 | 196 | request->size += len; |
4ea7d1a7 | 197 | } |
3149f183 | 198 | if (request->start + request->size > blk_getlength(dataplane->blk)) { |
ca072800 | 199 | error_report("error: access beyond end of file"); |
4ea7d1a7 PD |
200 | goto err; |
201 | } | |
202 | return 0; | |
203 | ||
204 | err: | |
e7f5b5f8 | 205 | request->status = BLKIF_RSP_ERROR; |
4ea7d1a7 PD |
206 | return -1; |
207 | } | |
208 | ||
d4683cf9 | 209 | static int xen_block_copy_request(XenBlockRequest *request) |
4ea7d1a7 | 210 | { |
e7f5b5f8 | 211 | XenBlockDataPlane *dataplane = request->dataplane; |
f3b604e3 | 212 | XenDevice *xendev = dataplane->xendev; |
fcab2b46 PD |
213 | XenDeviceGrantCopySegment segs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
214 | int i, count; | |
e7f5b5f8 PD |
215 | bool to_domain = (request->req.operation == BLKIF_OP_READ); |
216 | void *virt = request->buf; | |
fcab2b46 | 217 | Error *local_err = NULL; |
4ea7d1a7 | 218 | |
e7f5b5f8 | 219 | if (request->req.nr_segments == 0) { |
4ea7d1a7 PD |
220 | return 0; |
221 | } | |
222 | ||
e7f5b5f8 | 223 | count = request->req.nr_segments; |
4ea7d1a7 PD |
224 | |
225 | for (i = 0; i < count; i++) { | |
226 | if (to_domain) { | |
e7f5b5f8 PD |
227 | segs[i].dest.foreign.ref = request->req.seg[i].gref; |
228 | segs[i].dest.foreign.offset = request->req.seg[i].first_sect * | |
5feeb718 | 229 | dataplane->sector_size; |
4ea7d1a7 PD |
230 | segs[i].source.virt = virt; |
231 | } else { | |
e7f5b5f8 PD |
232 | segs[i].source.foreign.ref = request->req.seg[i].gref; |
233 | segs[i].source.foreign.offset = request->req.seg[i].first_sect * | |
5feeb718 | 234 | dataplane->sector_size; |
4ea7d1a7 PD |
235 | segs[i].dest.virt = virt; |
236 | } | |
e7f5b5f8 | 237 | segs[i].len = (request->req.seg[i].last_sect - |
2bcd05cf | 238 | request->req.seg[i].first_sect + 1) * |
5feeb718 | 239 | dataplane->sector_size; |
4ea7d1a7 PD |
240 | virt += segs[i].len; |
241 | } | |
242 | ||
fcab2b46 PD |
243 | xen_device_copy_grant_refs(xendev, to_domain, segs, count, &local_err); |
244 | ||
245 | if (local_err) { | |
246 | error_reportf_err(local_err, "failed to copy data: "); | |
4ea7d1a7 | 247 | |
e7f5b5f8 | 248 | request->aio_errors++; |
4ea7d1a7 PD |
249 | return -1; |
250 | } | |
251 | ||
fcab2b46 | 252 | return 0; |
4ea7d1a7 PD |
253 | } |
254 | ||
d4683cf9 | 255 | static int xen_block_do_aio(XenBlockRequest *request); |
4ea7d1a7 | 256 | |
d4683cf9 | 257 | static void xen_block_complete_aio(void *opaque, int ret) |
4ea7d1a7 | 258 | { |
e7f5b5f8 PD |
259 | XenBlockRequest *request = opaque; |
260 | XenBlockDataPlane *dataplane = request->dataplane; | |
4ea7d1a7 | 261 | |
f3b604e3 | 262 | aio_context_acquire(dataplane->ctx); |
4ea7d1a7 PD |
263 | |
264 | if (ret != 0) { | |
ca072800 | 265 | error_report("%s I/O error", |
e7f5b5f8 | 266 | request->req.operation == BLKIF_OP_READ ? |
ca072800 | 267 | "read" : "write"); |
e7f5b5f8 | 268 | request->aio_errors++; |
4ea7d1a7 PD |
269 | } |
270 | ||
e7f5b5f8 PD |
271 | request->aio_inflight--; |
272 | if (request->presync) { | |
273 | request->presync = 0; | |
d4683cf9 | 274 | xen_block_do_aio(request); |
4ea7d1a7 PD |
275 | goto done; |
276 | } | |
e7f5b5f8 | 277 | if (request->aio_inflight > 0) { |
4ea7d1a7 PD |
278 | goto done; |
279 | } | |
280 | ||
e7f5b5f8 | 281 | switch (request->req.operation) { |
4ea7d1a7 | 282 | case BLKIF_OP_READ: |
e7f5b5f8 | 283 | /* in case of failure request->aio_errors is increased */ |
4ea7d1a7 | 284 | if (ret == 0) { |
d4683cf9 | 285 | xen_block_copy_request(request); |
4ea7d1a7 | 286 | } |
4ea7d1a7 PD |
287 | break; |
288 | case BLKIF_OP_WRITE: | |
289 | case BLKIF_OP_FLUSH_DISKCACHE: | |
4ea7d1a7 PD |
290 | default: |
291 | break; | |
292 | } | |
293 | ||
e7f5b5f8 | 294 | request->status = request->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; |
4ea7d1a7 | 295 | |
e7f5b5f8 | 296 | switch (request->req.operation) { |
4ea7d1a7 PD |
297 | case BLKIF_OP_WRITE: |
298 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 | 299 | if (!request->req.nr_segments) { |
4ea7d1a7 PD |
300 | break; |
301 | } | |
e02d9494 | 302 | /* fall through */ |
4ea7d1a7 | 303 | case BLKIF_OP_READ: |
e7f5b5f8 PD |
304 | if (request->status == BLKIF_RSP_OKAY) { |
305 | block_acct_done(blk_get_stats(dataplane->blk), &request->acct); | |
4ea7d1a7 | 306 | } else { |
e7f5b5f8 | 307 | block_acct_failed(blk_get_stats(dataplane->blk), &request->acct); |
4ea7d1a7 PD |
308 | } |
309 | break; | |
310 | case BLKIF_OP_DISCARD: | |
311 | default: | |
312 | break; | |
313 | } | |
bfd0d636 | 314 | |
36d883ba | 315 | xen_block_complete_request(request); |
bfd0d636 | 316 | |
345f42b4 PD |
317 | if (dataplane->more_work) { |
318 | qemu_bh_schedule(dataplane->bh); | |
319 | } | |
4ea7d1a7 PD |
320 | |
321 | done: | |
f3b604e3 | 322 | aio_context_release(dataplane->ctx); |
4ea7d1a7 PD |
323 | } |
324 | ||
d4683cf9 PD |
325 | static bool xen_block_split_discard(XenBlockRequest *request, |
326 | blkif_sector_t sector_number, | |
327 | uint64_t nr_sectors) | |
4ea7d1a7 | 328 | { |
e7f5b5f8 | 329 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 PD |
330 | int64_t byte_offset; |
331 | int byte_chunk; | |
2bcd05cf | 332 | uint64_t byte_remaining; |
4ea7d1a7 PD |
333 | uint64_t sec_start = sector_number; |
334 | uint64_t sec_count = nr_sectors; | |
335 | ||
336 | /* Wrap around, or overflowing byte limit? */ | |
337 | if (sec_start + sec_count < sec_count || | |
5feeb718 | 338 | sec_start + sec_count > INT64_MAX / dataplane->sector_size) { |
4ea7d1a7 PD |
339 | return false; |
340 | } | |
341 | ||
5feeb718 PD |
342 | byte_offset = sec_start * dataplane->sector_size; |
343 | byte_remaining = sec_count * dataplane->sector_size; | |
4ea7d1a7 PD |
344 | |
345 | do { | |
2bcd05cf PD |
346 | byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ? |
347 | BDRV_REQUEST_MAX_BYTES : byte_remaining; | |
e7f5b5f8 | 348 | request->aio_inflight++; |
f3b604e3 | 349 | blk_aio_pdiscard(dataplane->blk, byte_offset, byte_chunk, |
d4683cf9 | 350 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
351 | byte_remaining -= byte_chunk; |
352 | byte_offset += byte_chunk; | |
353 | } while (byte_remaining > 0); | |
354 | ||
355 | return true; | |
356 | } | |
357 | ||
d4683cf9 | 358 | static int xen_block_do_aio(XenBlockRequest *request) |
4ea7d1a7 | 359 | { |
e7f5b5f8 PD |
360 | XenBlockDataPlane *dataplane = request->dataplane; |
361 | ||
e7f5b5f8 PD |
362 | if (request->req.nr_segments && |
363 | (request->req.operation == BLKIF_OP_WRITE || | |
364 | request->req.operation == BLKIF_OP_FLUSH_DISKCACHE) && | |
d4683cf9 | 365 | xen_block_copy_request(request)) { |
4ea7d1a7 PD |
366 | goto err; |
367 | } | |
368 | ||
e7f5b5f8 PD |
369 | request->aio_inflight++; |
370 | if (request->presync) { | |
d4683cf9 PD |
371 | blk_aio_flush(request->dataplane->blk, xen_block_complete_aio, |
372 | request); | |
4ea7d1a7 PD |
373 | return 0; |
374 | } | |
375 | ||
e7f5b5f8 | 376 | switch (request->req.operation) { |
4ea7d1a7 | 377 | case BLKIF_OP_READ: |
e7f5b5f8 PD |
378 | qemu_iovec_add(&request->v, request->buf, request->size); |
379 | block_acct_start(blk_get_stats(dataplane->blk), &request->acct, | |
380 | request->v.size, BLOCK_ACCT_READ); | |
381 | request->aio_inflight++; | |
382 | blk_aio_preadv(dataplane->blk, request->start, &request->v, 0, | |
d4683cf9 | 383 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
384 | break; |
385 | case BLKIF_OP_WRITE: | |
386 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 | 387 | if (!request->req.nr_segments) { |
4ea7d1a7 PD |
388 | break; |
389 | } | |
390 | ||
e7f5b5f8 PD |
391 | qemu_iovec_add(&request->v, request->buf, request->size); |
392 | block_acct_start(blk_get_stats(dataplane->blk), &request->acct, | |
393 | request->v.size, | |
394 | request->req.operation == BLKIF_OP_WRITE ? | |
4ea7d1a7 | 395 | BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH); |
e7f5b5f8 PD |
396 | request->aio_inflight++; |
397 | blk_aio_pwritev(dataplane->blk, request->start, &request->v, 0, | |
d4683cf9 | 398 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
399 | break; |
400 | case BLKIF_OP_DISCARD: | |
401 | { | |
e7f5b5f8 | 402 | struct blkif_request_discard *req = (void *)&request->req; |
d4683cf9 PD |
403 | if (!xen_block_split_discard(request, req->sector_number, |
404 | req->nr_sectors)) { | |
4ea7d1a7 PD |
405 | goto err; |
406 | } | |
407 | break; | |
408 | } | |
409 | default: | |
410 | /* unknown operation (shouldn't happen -- parse catches this) */ | |
411 | goto err; | |
412 | } | |
413 | ||
d4683cf9 | 414 | xen_block_complete_aio(request, 0); |
4ea7d1a7 PD |
415 | |
416 | return 0; | |
417 | ||
418 | err: | |
e7f5b5f8 | 419 | request->status = BLKIF_RSP_ERROR; |
36d883ba | 420 | xen_block_complete_request(request); |
4ea7d1a7 PD |
421 | return -1; |
422 | } | |
423 | ||
bfd0d636 | 424 | static int xen_block_send_response(XenBlockRequest *request) |
4ea7d1a7 | 425 | { |
e7f5b5f8 | 426 | XenBlockDataPlane *dataplane = request->dataplane; |
fcab2b46 PD |
427 | int send_notify = 0; |
428 | int have_requests = 0; | |
429 | blkif_response_t *resp; | |
4ea7d1a7 PD |
430 | |
431 | /* Place on the response ring for the relevant domain. */ | |
f3b604e3 | 432 | switch (dataplane->protocol) { |
4ea7d1a7 PD |
433 | case BLKIF_PROTOCOL_NATIVE: |
434 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
435 | &dataplane->rings.native, |
436 | dataplane->rings.native.rsp_prod_pvt); | |
4ea7d1a7 PD |
437 | break; |
438 | case BLKIF_PROTOCOL_X86_32: | |
439 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
440 | &dataplane->rings.x86_32_part, |
441 | dataplane->rings.x86_32_part.rsp_prod_pvt); | |
4ea7d1a7 PD |
442 | break; |
443 | case BLKIF_PROTOCOL_X86_64: | |
444 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
445 | &dataplane->rings.x86_64_part, |
446 | dataplane->rings.x86_64_part.rsp_prod_pvt); | |
4ea7d1a7 PD |
447 | break; |
448 | default: | |
449 | return 0; | |
450 | } | |
451 | ||
e7f5b5f8 PD |
452 | resp->id = request->req.id; |
453 | resp->operation = request->req.operation; | |
454 | resp->status = request->status; | |
4ea7d1a7 | 455 | |
f3b604e3 | 456 | dataplane->rings.common.rsp_prod_pvt++; |
4ea7d1a7 | 457 | |
f3b604e3 PD |
458 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&dataplane->rings.common, |
459 | send_notify); | |
460 | if (dataplane->rings.common.rsp_prod_pvt == | |
461 | dataplane->rings.common.req_cons) { | |
4ea7d1a7 PD |
462 | /* |
463 | * Tail check for pending requests. Allows frontend to avoid | |
464 | * notifications if requests are already in flight (lower | |
465 | * overheads and promotes batching). | |
466 | */ | |
f3b604e3 PD |
467 | RING_FINAL_CHECK_FOR_REQUESTS(&dataplane->rings.common, |
468 | have_requests); | |
469 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&dataplane->rings.common)) { | |
4ea7d1a7 PD |
470 | have_requests = 1; |
471 | } | |
472 | ||
473 | if (have_requests) { | |
f3b604e3 | 474 | dataplane->more_work++; |
4ea7d1a7 PD |
475 | } |
476 | return send_notify; | |
477 | } | |
478 | ||
d4683cf9 PD |
479 | static int xen_block_get_request(XenBlockDataPlane *dataplane, |
480 | XenBlockRequest *request, RING_IDX rc) | |
4ea7d1a7 | 481 | { |
f3b604e3 PD |
482 | switch (dataplane->protocol) { |
483 | case BLKIF_PROTOCOL_NATIVE: { | |
484 | blkif_request_t *req = | |
485 | RING_GET_REQUEST(&dataplane->rings.native, rc); | |
486 | ||
e7f5b5f8 | 487 | memcpy(&request->req, req, sizeof(request->req)); |
4ea7d1a7 | 488 | break; |
f3b604e3 PD |
489 | } |
490 | case BLKIF_PROTOCOL_X86_32: { | |
491 | blkif_x86_32_request_t *req = | |
492 | RING_GET_REQUEST(&dataplane->rings.x86_32_part, rc); | |
493 | ||
e7f5b5f8 | 494 | blkif_get_x86_32_req(&request->req, req); |
4ea7d1a7 | 495 | break; |
f3b604e3 PD |
496 | } |
497 | case BLKIF_PROTOCOL_X86_64: { | |
498 | blkif_x86_64_request_t *req = | |
499 | RING_GET_REQUEST(&dataplane->rings.x86_64_part, rc); | |
500 | ||
e7f5b5f8 | 501 | blkif_get_x86_64_req(&request->req, req); |
4ea7d1a7 PD |
502 | break; |
503 | } | |
f3b604e3 | 504 | } |
4ea7d1a7 PD |
505 | /* Prevent the compiler from accessing the on-ring fields instead. */ |
506 | barrier(); | |
507 | return 0; | |
508 | } | |
509 | ||
6de45f91 TS |
510 | /* |
511 | * Threshold of in-flight requests above which we will start using | |
512 | * blk_io_plug()/blk_io_unplug() to batch requests. | |
513 | */ | |
514 | #define IO_PLUG_THRESHOLD 1 | |
515 | ||
345f42b4 | 516 | static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) |
4ea7d1a7 PD |
517 | { |
518 | RING_IDX rc, rp; | |
e7f5b5f8 | 519 | XenBlockRequest *request; |
6de45f91 TS |
520 | int inflight_atstart = dataplane->requests_inflight; |
521 | int batched = 0; | |
345f42b4 | 522 | bool done_something = false; |
4ea7d1a7 | 523 | |
f3b604e3 | 524 | dataplane->more_work = 0; |
4ea7d1a7 | 525 | |
f3b604e3 PD |
526 | rc = dataplane->rings.common.req_cons; |
527 | rp = dataplane->rings.common.sring->req_prod; | |
4ea7d1a7 PD |
528 | xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ |
529 | ||
6de45f91 TS |
530 | /* |
531 | * If there was more than IO_PLUG_THRESHOLD requests in flight | |
532 | * when we got here, this is an indication that there the bottleneck | |
533 | * is below us, so it's worth beginning to batch up I/O requests | |
534 | * rather than submitting them immediately. The maximum number | |
535 | * of requests we're willing to batch is the number already in | |
536 | * flight, so it can grow up to max_requests when the bottleneck | |
537 | * is below us. | |
538 | */ | |
539 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | |
540 | blk_io_plug(dataplane->blk); | |
541 | } | |
4ea7d1a7 PD |
542 | while (rc != rp) { |
543 | /* pull request from ring */ | |
f3b604e3 | 544 | if (RING_REQUEST_CONS_OVERFLOW(&dataplane->rings.common, rc)) { |
4ea7d1a7 PD |
545 | break; |
546 | } | |
d4683cf9 | 547 | request = xen_block_start_request(dataplane); |
e7f5b5f8 | 548 | if (request == NULL) { |
f3b604e3 | 549 | dataplane->more_work++; |
4ea7d1a7 PD |
550 | break; |
551 | } | |
d4683cf9 | 552 | xen_block_get_request(dataplane, request, rc); |
f3b604e3 | 553 | dataplane->rings.common.req_cons = ++rc; |
345f42b4 | 554 | done_something = true; |
4ea7d1a7 PD |
555 | |
556 | /* parse them */ | |
d4683cf9 | 557 | if (xen_block_parse_request(request) != 0) { |
e7f5b5f8 | 558 | switch (request->req.operation) { |
4ea7d1a7 | 559 | case BLKIF_OP_READ: |
f3b604e3 | 560 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
561 | BLOCK_ACCT_READ); |
562 | break; | |
563 | case BLKIF_OP_WRITE: | |
f3b604e3 | 564 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
565 | BLOCK_ACCT_WRITE); |
566 | break; | |
567 | case BLKIF_OP_FLUSH_DISKCACHE: | |
f3b604e3 | 568 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
569 | BLOCK_ACCT_FLUSH); |
570 | default: | |
571 | break; | |
572 | }; | |
573 | ||
36d883ba | 574 | xen_block_complete_request(request); |
4ea7d1a7 PD |
575 | continue; |
576 | } | |
577 | ||
6de45f91 TS |
578 | if (inflight_atstart > IO_PLUG_THRESHOLD && |
579 | batched >= inflight_atstart) { | |
580 | blk_io_unplug(dataplane->blk); | |
581 | } | |
d4683cf9 | 582 | xen_block_do_aio(request); |
6de45f91 TS |
583 | if (inflight_atstart > IO_PLUG_THRESHOLD) { |
584 | if (batched >= inflight_atstart) { | |
585 | blk_io_plug(dataplane->blk); | |
586 | batched = 0; | |
587 | } else { | |
588 | batched++; | |
589 | } | |
590 | } | |
591 | } | |
592 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | |
593 | blk_io_unplug(dataplane->blk); | |
4ea7d1a7 PD |
594 | } |
595 | ||
345f42b4 | 596 | return done_something; |
4ea7d1a7 PD |
597 | } |
598 | ||
d4683cf9 | 599 | static void xen_block_dataplane_bh(void *opaque) |
4ea7d1a7 | 600 | { |
f3b604e3 | 601 | XenBlockDataPlane *dataplane = opaque; |
4ea7d1a7 | 602 | |
f3b604e3 | 603 | aio_context_acquire(dataplane->ctx); |
d4683cf9 | 604 | xen_block_handle_requests(dataplane); |
f3b604e3 | 605 | aio_context_release(dataplane->ctx); |
4ea7d1a7 PD |
606 | } |
607 | ||
345f42b4 | 608 | static bool xen_block_dataplane_event(void *opaque) |
fcab2b46 | 609 | { |
f3b604e3 | 610 | XenBlockDataPlane *dataplane = opaque; |
fcab2b46 | 611 | |
345f42b4 | 612 | return xen_block_handle_requests(dataplane); |
fcab2b46 PD |
613 | } |
614 | ||
f3b604e3 | 615 | XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, |
5feeb718 PD |
616 | BlockBackend *blk, |
617 | unsigned int sector_size, | |
f3b604e3 | 618 | IOThread *iothread) |
4ea7d1a7 | 619 | { |
f3b604e3 | 620 | XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1); |
4ea7d1a7 | 621 | |
f3b604e3 | 622 | dataplane->xendev = xendev; |
5feeb718 PD |
623 | dataplane->blk = blk; |
624 | dataplane->sector_size = sector_size; | |
4ea7d1a7 | 625 | |
f3b604e3 | 626 | QLIST_INIT(&dataplane->inflight); |
f3b604e3 | 627 | QLIST_INIT(&dataplane->freelist); |
4ea7d1a7 | 628 | |
fcab2b46 | 629 | if (iothread) { |
f3b604e3 PD |
630 | dataplane->iothread = iothread; |
631 | object_ref(OBJECT(dataplane->iothread)); | |
632 | dataplane->ctx = iothread_get_aio_context(dataplane->iothread); | |
fcab2b46 | 633 | } else { |
f3b604e3 | 634 | dataplane->ctx = qemu_get_aio_context(); |
fcab2b46 | 635 | } |
f63192b0 AB |
636 | dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, |
637 | dataplane, | |
638 | &DEVICE(xendev)->mem_reentrancy_guard); | |
fcab2b46 | 639 | |
f3b604e3 | 640 | return dataplane; |
4ea7d1a7 PD |
641 | } |
642 | ||
f3b604e3 | 643 | void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 644 | { |
e7f5b5f8 | 645 | XenBlockRequest *request; |
4ea7d1a7 | 646 | |
f3b604e3 | 647 | if (!dataplane) { |
fcab2b46 PD |
648 | return; |
649 | } | |
4ea7d1a7 | 650 | |
f3b604e3 | 651 | while (!QLIST_EMPTY(&dataplane->freelist)) { |
e7f5b5f8 PD |
652 | request = QLIST_FIRST(&dataplane->freelist); |
653 | QLIST_REMOVE(request, list); | |
654 | qemu_iovec_destroy(&request->v); | |
c6025bd1 | 655 | qemu_vfree(request->buf); |
e7f5b5f8 | 656 | g_free(request); |
4ea7d1a7 PD |
657 | } |
658 | ||
f3b604e3 PD |
659 | qemu_bh_delete(dataplane->bh); |
660 | if (dataplane->iothread) { | |
661 | object_unref(OBJECT(dataplane->iothread)); | |
fcab2b46 PD |
662 | } |
663 | ||
f3b604e3 | 664 | g_free(dataplane); |
4ea7d1a7 PD |
665 | } |
666 | ||
f6eac904 SH |
667 | void xen_block_dataplane_detach(XenBlockDataPlane *dataplane) |
668 | { | |
669 | if (!dataplane || !dataplane->event_channel) { | |
670 | return; | |
671 | } | |
672 | ||
673 | /* Only reason for failure is a NULL channel */ | |
674 | xen_device_set_event_channel_context(dataplane->xendev, | |
675 | dataplane->event_channel, | |
676 | NULL, &error_abort); | |
677 | } | |
678 | ||
679 | void xen_block_dataplane_attach(XenBlockDataPlane *dataplane) | |
680 | { | |
681 | if (!dataplane || !dataplane->event_channel) { | |
682 | return; | |
683 | } | |
684 | ||
685 | /* Only reason for failure is a NULL channel */ | |
686 | xen_device_set_event_channel_context(dataplane->xendev, | |
687 | dataplane->event_channel, | |
688 | dataplane->ctx, &error_abort); | |
689 | } | |
690 | ||
f3b604e3 | 691 | void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 692 | { |
fcab2b46 | 693 | XenDevice *xendev; |
4ea7d1a7 | 694 | |
f3b604e3 | 695 | if (!dataplane) { |
fcab2b46 PD |
696 | return; |
697 | } | |
698 | ||
32d0b7be PD |
699 | xendev = dataplane->xendev; |
700 | ||
f6eac904 SH |
701 | if (!blk_in_drain(dataplane->blk)) { |
702 | xen_block_dataplane_detach(dataplane); | |
32d0b7be | 703 | } |
f6eac904 SH |
704 | |
705 | aio_context_acquire(dataplane->ctx); | |
97896a48 KW |
706 | /* Xen doesn't have multiple users for nodes, so this can't fail */ |
707 | blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); | |
f3b604e3 | 708 | aio_context_release(dataplane->ctx); |
fcab2b46 | 709 | |
32d0b7be PD |
710 | /* |
711 | * Now that the context has been moved onto the main thread, cancel | |
712 | * further processing. | |
713 | */ | |
714 | qemu_bh_cancel(dataplane->bh); | |
fcab2b46 | 715 | |
f3b604e3 | 716 | if (dataplane->event_channel) { |
fcab2b46 PD |
717 | Error *local_err = NULL; |
718 | ||
f3b604e3 | 719 | xen_device_unbind_event_channel(xendev, dataplane->event_channel, |
fcab2b46 | 720 | &local_err); |
f3b604e3 | 721 | dataplane->event_channel = NULL; |
fcab2b46 PD |
722 | |
723 | if (local_err) { | |
724 | error_report_err(local_err); | |
725 | } | |
726 | } | |
727 | ||
f3b604e3 | 728 | if (dataplane->sring) { |
fcab2b46 PD |
729 | Error *local_err = NULL; |
730 | ||
f3b604e3 | 731 | xen_device_unmap_grant_refs(xendev, dataplane->sring, |
f80fad16 | 732 | dataplane->ring_ref, |
f3b604e3 PD |
733 | dataplane->nr_ring_ref, &local_err); |
734 | dataplane->sring = NULL; | |
fcab2b46 PD |
735 | |
736 | if (local_err) { | |
737 | error_report_err(local_err); | |
738 | } | |
739 | } | |
740 | ||
f3b604e3 PD |
741 | g_free(dataplane->ring_ref); |
742 | dataplane->ring_ref = NULL; | |
fcab2b46 PD |
743 | } |
744 | ||
f3b604e3 | 745 | void xen_block_dataplane_start(XenBlockDataPlane *dataplane, |
fcab2b46 PD |
746 | const unsigned int ring_ref[], |
747 | unsigned int nr_ring_ref, | |
748 | unsigned int event_channel, | |
749 | unsigned int protocol, | |
750 | Error **errp) | |
751 | { | |
1de7096d | 752 | ERRP_GUARD(); |
f3b604e3 | 753 | XenDevice *xendev = dataplane->xendev; |
c7040ff6 | 754 | AioContext *old_context; |
fcab2b46 PD |
755 | unsigned int ring_size; |
756 | unsigned int i; | |
757 | ||
f3b604e3 PD |
758 | dataplane->nr_ring_ref = nr_ring_ref; |
759 | dataplane->ring_ref = g_new(unsigned int, nr_ring_ref); | |
fcab2b46 PD |
760 | |
761 | for (i = 0; i < nr_ring_ref; i++) { | |
f3b604e3 | 762 | dataplane->ring_ref[i] = ring_ref[i]; |
fcab2b46 PD |
763 | } |
764 | ||
f3b604e3 | 765 | dataplane->protocol = protocol; |
fcab2b46 | 766 | |
a9ae1418 | 767 | ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref; |
f3b604e3 | 768 | switch (dataplane->protocol) { |
fcab2b46 PD |
769 | case BLKIF_PROTOCOL_NATIVE: |
770 | { | |
f3b604e3 | 771 | dataplane->max_requests = __CONST_RING_SIZE(blkif, ring_size); |
fcab2b46 PD |
772 | break; |
773 | } | |
774 | case BLKIF_PROTOCOL_X86_32: | |
775 | { | |
f3b604e3 | 776 | dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size); |
fcab2b46 PD |
777 | break; |
778 | } | |
779 | case BLKIF_PROTOCOL_X86_64: | |
780 | { | |
f3b604e3 | 781 | dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size); |
fcab2b46 PD |
782 | break; |
783 | } | |
784 | default: | |
f3b604e3 | 785 | error_setg(errp, "unknown protocol %u", dataplane->protocol); |
fcab2b46 PD |
786 | return; |
787 | } | |
788 | ||
f3b604e3 | 789 | xen_device_set_max_grant_refs(xendev, dataplane->nr_ring_ref, |
1de7096d VSO |
790 | errp); |
791 | if (*errp) { | |
fcab2b46 PD |
792 | goto stop; |
793 | } | |
794 | ||
f3b604e3 PD |
795 | dataplane->sring = xen_device_map_grant_refs(xendev, |
796 | dataplane->ring_ref, | |
797 | dataplane->nr_ring_ref, | |
fcab2b46 | 798 | PROT_READ | PROT_WRITE, |
1de7096d VSO |
799 | errp); |
800 | if (*errp) { | |
fcab2b46 PD |
801 | goto stop; |
802 | } | |
803 | ||
f3b604e3 | 804 | switch (dataplane->protocol) { |
fcab2b46 PD |
805 | case BLKIF_PROTOCOL_NATIVE: |
806 | { | |
f3b604e3 | 807 | blkif_sring_t *sring_native = dataplane->sring; |
fcab2b46 | 808 | |
f3b604e3 | 809 | BACK_RING_INIT(&dataplane->rings.native, sring_native, ring_size); |
fcab2b46 PD |
810 | break; |
811 | } | |
812 | case BLKIF_PROTOCOL_X86_32: | |
813 | { | |
f3b604e3 | 814 | blkif_x86_32_sring_t *sring_x86_32 = dataplane->sring; |
fcab2b46 | 815 | |
f3b604e3 | 816 | BACK_RING_INIT(&dataplane->rings.x86_32_part, sring_x86_32, |
fcab2b46 PD |
817 | ring_size); |
818 | break; | |
819 | } | |
820 | case BLKIF_PROTOCOL_X86_64: | |
821 | { | |
f3b604e3 | 822 | blkif_x86_64_sring_t *sring_x86_64 = dataplane->sring; |
fcab2b46 | 823 | |
f3b604e3 | 824 | BACK_RING_INIT(&dataplane->rings.x86_64_part, sring_x86_64, |
fcab2b46 PD |
825 | ring_size); |
826 | break; | |
827 | } | |
828 | } | |
829 | ||
f3b604e3 | 830 | dataplane->event_channel = |
32d0b7be | 831 | xen_device_bind_event_channel(xendev, event_channel, |
d4683cf9 | 832 | xen_block_dataplane_event, dataplane, |
1de7096d VSO |
833 | errp); |
834 | if (*errp) { | |
fcab2b46 PD |
835 | goto stop; |
836 | } | |
837 | ||
c7040ff6 SL |
838 | old_context = blk_get_aio_context(dataplane->blk); |
839 | aio_context_acquire(old_context); | |
97896a48 KW |
840 | /* If other users keep the BlockBackend in the iothread, that's ok */ |
841 | blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); | |
c7040ff6 SL |
842 | aio_context_release(old_context); |
843 | ||
f6eac904 SH |
844 | if (!blk_in_drain(dataplane->blk)) { |
845 | xen_block_dataplane_attach(dataplane); | |
846 | } | |
32d0b7be | 847 | |
fcab2b46 PD |
848 | return; |
849 | ||
850 | stop: | |
f3b604e3 | 851 | xen_block_dataplane_stop(dataplane); |
4ea7d1a7 | 852 | } |