]> git.proxmox.com Git - mirror_qemu.git/blame - hw/block/dataplane/xen-block.c
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
[mirror_qemu.git] / hw / block / dataplane / xen-block.c
CommitLineData
4ea7d1a7 1/*
ca072800
PD
2 * Copyright (c) 2018 Citrix Systems Inc.
3 * (c) Gerd Hoffmann <kraxel@redhat.com>
4ea7d1a7 4 *
ca072800
PD
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; under version 2 of the License.
4ea7d1a7 8 *
ca072800
PD
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
4ea7d1a7 13 *
ca072800
PD
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, see <http://www.gnu.org/licenses/>.
4ea7d1a7 16 *
ca072800
PD
17 * Contributions after 2012-01-13 are licensed under the terms of the
18 * GNU GPL, version 2 or (at your option) any later version.
4ea7d1a7
PD
19 */
20
fcab2b46
PD
21#include "qemu/osdep.h"
22#include "qemu/error-report.h"
db725815 23#include "qemu/main-loop.h"
5df022cf 24#include "qemu/memalign.h"
fcab2b46 25#include "qapi/error.h"
e2abfe5e 26#include "hw/xen/xen.h"
fcab2b46 27#include "hw/block/xen_blkif.h"
e2abfe5e 28#include "hw/xen/interface/io/ring.h"
fcab2b46
PD
29#include "sysemu/block-backend.h"
30#include "sysemu/iothread.h"
31#include "xen-block.h"
32
e7f5b5f8 33typedef struct XenBlockRequest {
fcab2b46
PD
34 blkif_request_t req;
35 int16_t status;
36 off_t start;
37 QEMUIOVector v;
38 void *buf;
39 size_t size;
40 int presync;
41 int aio_inflight;
42 int aio_errors;
f3b604e3 43 XenBlockDataPlane *dataplane;
e7f5b5f8 44 QLIST_ENTRY(XenBlockRequest) list;
fcab2b46 45 BlockAcctCookie acct;
e7f5b5f8 46} XenBlockRequest;
4ea7d1a7 47
f3b604e3 48struct XenBlockDataPlane {
fcab2b46
PD
49 XenDevice *xendev;
50 XenEventChannel *event_channel;
51 unsigned int *ring_ref;
52 unsigned int nr_ring_ref;
53 void *sring;
fcab2b46
PD
54 int protocol;
55 blkif_back_rings_t rings;
56 int more_work;
e7f5b5f8 57 QLIST_HEAD(inflight_head, XenBlockRequest) inflight;
e7f5b5f8 58 QLIST_HEAD(freelist_head, XenBlockRequest) freelist;
fcab2b46
PD
59 int requests_total;
60 int requests_inflight;
fcab2b46
PD
61 unsigned int max_requests;
62 BlockBackend *blk;
5feeb718 63 unsigned int sector_size;
fcab2b46
PD
64 QEMUBH *bh;
65 IOThread *iothread;
66 AioContext *ctx;
4ea7d1a7
PD
67};
68
36d883ba
AP
69static int xen_block_send_response(XenBlockRequest *request);
70
d4683cf9 71static void reset_request(XenBlockRequest *request)
4ea7d1a7 72{
e7f5b5f8
PD
73 memset(&request->req, 0, sizeof(request->req));
74 request->status = 0;
75 request->start = 0;
e7f5b5f8
PD
76 request->size = 0;
77 request->presync = 0;
4ea7d1a7 78
e7f5b5f8
PD
79 request->aio_inflight = 0;
80 request->aio_errors = 0;
4ea7d1a7 81
e7f5b5f8
PD
82 request->dataplane = NULL;
83 memset(&request->list, 0, sizeof(request->list));
84 memset(&request->acct, 0, sizeof(request->acct));
4ea7d1a7 85
e7f5b5f8 86 qemu_iovec_reset(&request->v);
4ea7d1a7
PD
87}
88
d4683cf9 89static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane)
4ea7d1a7 90{
e7f5b5f8 91 XenBlockRequest *request = NULL;
4ea7d1a7 92
f3b604e3
PD
93 if (QLIST_EMPTY(&dataplane->freelist)) {
94 if (dataplane->requests_total >= dataplane->max_requests) {
4ea7d1a7
PD
95 goto out;
96 }
97 /* allocate new struct */
e7f5b5f8
PD
98 request = g_malloc0(sizeof(*request));
99 request->dataplane = dataplane;
c6025bd1
TS
100 /*
101 * We cannot need more pages per requests than this, and since we
102 * re-use requests, allocate the memory once here. It will be freed
103 * xen_block_dataplane_destroy() when the request list is freed.
104 */
a9ae1418 105 request->buf = qemu_memalign(XEN_PAGE_SIZE,
c6025bd1 106 BLKIF_MAX_SEGMENTS_PER_REQUEST *
a9ae1418 107 XEN_PAGE_SIZE);
f3b604e3 108 dataplane->requests_total++;
e7f5b5f8 109 qemu_iovec_init(&request->v, 1);
4ea7d1a7
PD
110 } else {
111 /* get one from freelist */
e7f5b5f8
PD
112 request = QLIST_FIRST(&dataplane->freelist);
113 QLIST_REMOVE(request, list);
4ea7d1a7 114 }
e7f5b5f8 115 QLIST_INSERT_HEAD(&dataplane->inflight, request, list);
f3b604e3 116 dataplane->requests_inflight++;
4ea7d1a7
PD
117
118out:
e7f5b5f8 119 return request;
4ea7d1a7
PD
120}
121
36d883ba 122static void xen_block_complete_request(XenBlockRequest *request)
4ea7d1a7 123{
e7f5b5f8 124 XenBlockDataPlane *dataplane = request->dataplane;
4ea7d1a7 125
36d883ba
AP
126 if (xen_block_send_response(request)) {
127 Error *local_err = NULL;
4ea7d1a7 128
36d883ba
AP
129 xen_device_notify_event_channel(dataplane->xendev,
130 dataplane->event_channel,
131 &local_err);
132 if (local_err) {
133 error_report_err(local_err);
134 }
135 }
4ea7d1a7 136
e7f5b5f8 137 QLIST_REMOVE(request, list);
36d883ba 138 dataplane->requests_inflight--;
d4683cf9 139 reset_request(request);
e7f5b5f8
PD
140 request->dataplane = dataplane;
141 QLIST_INSERT_HEAD(&dataplane->freelist, request, list);
4ea7d1a7
PD
142}
143
144/*
145 * translate request into iovec + start offset
146 * do sanity checks along the way
147 */
d4683cf9 148static int xen_block_parse_request(XenBlockRequest *request)
4ea7d1a7 149{
e7f5b5f8 150 XenBlockDataPlane *dataplane = request->dataplane;
4ea7d1a7
PD
151 size_t len;
152 int i;
153
e7f5b5f8 154 switch (request->req.operation) {
4ea7d1a7
PD
155 case BLKIF_OP_READ:
156 break;
157 case BLKIF_OP_FLUSH_DISKCACHE:
e7f5b5f8
PD
158 request->presync = 1;
159 if (!request->req.nr_segments) {
4ea7d1a7
PD
160 return 0;
161 }
162 /* fall through */
163 case BLKIF_OP_WRITE:
164 break;
165 case BLKIF_OP_DISCARD:
166 return 0;
167 default:
e7f5b5f8 168 error_report("error: unknown operation (%d)", request->req.operation);
4ea7d1a7
PD
169 goto err;
170 };
171
e7f5b5f8 172 if (request->req.operation != BLKIF_OP_READ &&
86b1cf32 173 !blk_is_writable(dataplane->blk)) {
ca072800 174 error_report("error: write req for ro device");
4ea7d1a7
PD
175 goto err;
176 }
177
5feeb718 178 request->start = request->req.sector_number * dataplane->sector_size;
e7f5b5f8 179 for (i = 0; i < request->req.nr_segments; i++) {
4ea7d1a7 180 if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
ca072800 181 error_report("error: nr_segments too big");
4ea7d1a7
PD
182 goto err;
183 }
e7f5b5f8 184 if (request->req.seg[i].first_sect > request->req.seg[i].last_sect) {
ca072800 185 error_report("error: first > last sector");
4ea7d1a7
PD
186 goto err;
187 }
5feeb718 188 if (request->req.seg[i].last_sect * dataplane->sector_size >=
a9ae1418 189 XEN_PAGE_SIZE) {
ca072800 190 error_report("error: page crossing");
4ea7d1a7
PD
191 goto err;
192 }
193
e7f5b5f8 194 len = (request->req.seg[i].last_sect -
5feeb718 195 request->req.seg[i].first_sect + 1) * dataplane->sector_size;
e7f5b5f8 196 request->size += len;
4ea7d1a7 197 }
3149f183 198 if (request->start + request->size > blk_getlength(dataplane->blk)) {
ca072800 199 error_report("error: access beyond end of file");
4ea7d1a7
PD
200 goto err;
201 }
202 return 0;
203
204err:
e7f5b5f8 205 request->status = BLKIF_RSP_ERROR;
4ea7d1a7
PD
206 return -1;
207}
208
d4683cf9 209static int xen_block_copy_request(XenBlockRequest *request)
4ea7d1a7 210{
e7f5b5f8 211 XenBlockDataPlane *dataplane = request->dataplane;
f3b604e3 212 XenDevice *xendev = dataplane->xendev;
fcab2b46
PD
213 XenDeviceGrantCopySegment segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
214 int i, count;
e7f5b5f8
PD
215 bool to_domain = (request->req.operation == BLKIF_OP_READ);
216 void *virt = request->buf;
fcab2b46 217 Error *local_err = NULL;
4ea7d1a7 218
e7f5b5f8 219 if (request->req.nr_segments == 0) {
4ea7d1a7
PD
220 return 0;
221 }
222
e7f5b5f8 223 count = request->req.nr_segments;
4ea7d1a7
PD
224
225 for (i = 0; i < count; i++) {
226 if (to_domain) {
e7f5b5f8
PD
227 segs[i].dest.foreign.ref = request->req.seg[i].gref;
228 segs[i].dest.foreign.offset = request->req.seg[i].first_sect *
5feeb718 229 dataplane->sector_size;
4ea7d1a7
PD
230 segs[i].source.virt = virt;
231 } else {
e7f5b5f8
PD
232 segs[i].source.foreign.ref = request->req.seg[i].gref;
233 segs[i].source.foreign.offset = request->req.seg[i].first_sect *
5feeb718 234 dataplane->sector_size;
4ea7d1a7
PD
235 segs[i].dest.virt = virt;
236 }
e7f5b5f8 237 segs[i].len = (request->req.seg[i].last_sect -
2bcd05cf 238 request->req.seg[i].first_sect + 1) *
5feeb718 239 dataplane->sector_size;
4ea7d1a7
PD
240 virt += segs[i].len;
241 }
242
fcab2b46
PD
243 xen_device_copy_grant_refs(xendev, to_domain, segs, count, &local_err);
244
245 if (local_err) {
246 error_reportf_err(local_err, "failed to copy data: ");
4ea7d1a7 247
e7f5b5f8 248 request->aio_errors++;
4ea7d1a7
PD
249 return -1;
250 }
251
fcab2b46 252 return 0;
4ea7d1a7
PD
253}
254
d4683cf9 255static int xen_block_do_aio(XenBlockRequest *request);
4ea7d1a7 256
d4683cf9 257static void xen_block_complete_aio(void *opaque, int ret)
4ea7d1a7 258{
e7f5b5f8
PD
259 XenBlockRequest *request = opaque;
260 XenBlockDataPlane *dataplane = request->dataplane;
4ea7d1a7 261
f3b604e3 262 aio_context_acquire(dataplane->ctx);
4ea7d1a7
PD
263
264 if (ret != 0) {
ca072800 265 error_report("%s I/O error",
e7f5b5f8 266 request->req.operation == BLKIF_OP_READ ?
ca072800 267 "read" : "write");
e7f5b5f8 268 request->aio_errors++;
4ea7d1a7
PD
269 }
270
e7f5b5f8
PD
271 request->aio_inflight--;
272 if (request->presync) {
273 request->presync = 0;
d4683cf9 274 xen_block_do_aio(request);
4ea7d1a7
PD
275 goto done;
276 }
e7f5b5f8 277 if (request->aio_inflight > 0) {
4ea7d1a7
PD
278 goto done;
279 }
280
e7f5b5f8 281 switch (request->req.operation) {
4ea7d1a7 282 case BLKIF_OP_READ:
e7f5b5f8 283 /* in case of failure request->aio_errors is increased */
4ea7d1a7 284 if (ret == 0) {
d4683cf9 285 xen_block_copy_request(request);
4ea7d1a7 286 }
4ea7d1a7
PD
287 break;
288 case BLKIF_OP_WRITE:
289 case BLKIF_OP_FLUSH_DISKCACHE:
4ea7d1a7
PD
290 default:
291 break;
292 }
293
e7f5b5f8 294 request->status = request->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
4ea7d1a7 295
e7f5b5f8 296 switch (request->req.operation) {
4ea7d1a7
PD
297 case BLKIF_OP_WRITE:
298 case BLKIF_OP_FLUSH_DISKCACHE:
e7f5b5f8 299 if (!request->req.nr_segments) {
4ea7d1a7
PD
300 break;
301 }
e02d9494 302 /* fall through */
4ea7d1a7 303 case BLKIF_OP_READ:
e7f5b5f8
PD
304 if (request->status == BLKIF_RSP_OKAY) {
305 block_acct_done(blk_get_stats(dataplane->blk), &request->acct);
4ea7d1a7 306 } else {
e7f5b5f8 307 block_acct_failed(blk_get_stats(dataplane->blk), &request->acct);
4ea7d1a7
PD
308 }
309 break;
310 case BLKIF_OP_DISCARD:
311 default:
312 break;
313 }
bfd0d636 314
36d883ba 315 xen_block_complete_request(request);
bfd0d636 316
345f42b4
PD
317 if (dataplane->more_work) {
318 qemu_bh_schedule(dataplane->bh);
319 }
4ea7d1a7
PD
320
321done:
f3b604e3 322 aio_context_release(dataplane->ctx);
4ea7d1a7
PD
323}
324
d4683cf9
PD
325static bool xen_block_split_discard(XenBlockRequest *request,
326 blkif_sector_t sector_number,
327 uint64_t nr_sectors)
4ea7d1a7 328{
e7f5b5f8 329 XenBlockDataPlane *dataplane = request->dataplane;
4ea7d1a7
PD
330 int64_t byte_offset;
331 int byte_chunk;
2bcd05cf 332 uint64_t byte_remaining;
4ea7d1a7
PD
333 uint64_t sec_start = sector_number;
334 uint64_t sec_count = nr_sectors;
335
336 /* Wrap around, or overflowing byte limit? */
337 if (sec_start + sec_count < sec_count ||
5feeb718 338 sec_start + sec_count > INT64_MAX / dataplane->sector_size) {
4ea7d1a7
PD
339 return false;
340 }
341
5feeb718
PD
342 byte_offset = sec_start * dataplane->sector_size;
343 byte_remaining = sec_count * dataplane->sector_size;
4ea7d1a7
PD
344
345 do {
2bcd05cf
PD
346 byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ?
347 BDRV_REQUEST_MAX_BYTES : byte_remaining;
e7f5b5f8 348 request->aio_inflight++;
f3b604e3 349 blk_aio_pdiscard(dataplane->blk, byte_offset, byte_chunk,
d4683cf9 350 xen_block_complete_aio, request);
4ea7d1a7
PD
351 byte_remaining -= byte_chunk;
352 byte_offset += byte_chunk;
353 } while (byte_remaining > 0);
354
355 return true;
356}
357
d4683cf9 358static int xen_block_do_aio(XenBlockRequest *request)
4ea7d1a7 359{
e7f5b5f8
PD
360 XenBlockDataPlane *dataplane = request->dataplane;
361
e7f5b5f8
PD
362 if (request->req.nr_segments &&
363 (request->req.operation == BLKIF_OP_WRITE ||
364 request->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
d4683cf9 365 xen_block_copy_request(request)) {
4ea7d1a7
PD
366 goto err;
367 }
368
e7f5b5f8
PD
369 request->aio_inflight++;
370 if (request->presync) {
d4683cf9
PD
371 blk_aio_flush(request->dataplane->blk, xen_block_complete_aio,
372 request);
4ea7d1a7
PD
373 return 0;
374 }
375
e7f5b5f8 376 switch (request->req.operation) {
4ea7d1a7 377 case BLKIF_OP_READ:
e7f5b5f8
PD
378 qemu_iovec_add(&request->v, request->buf, request->size);
379 block_acct_start(blk_get_stats(dataplane->blk), &request->acct,
380 request->v.size, BLOCK_ACCT_READ);
381 request->aio_inflight++;
382 blk_aio_preadv(dataplane->blk, request->start, &request->v, 0,
d4683cf9 383 xen_block_complete_aio, request);
4ea7d1a7
PD
384 break;
385 case BLKIF_OP_WRITE:
386 case BLKIF_OP_FLUSH_DISKCACHE:
e7f5b5f8 387 if (!request->req.nr_segments) {
4ea7d1a7
PD
388 break;
389 }
390
e7f5b5f8
PD
391 qemu_iovec_add(&request->v, request->buf, request->size);
392 block_acct_start(blk_get_stats(dataplane->blk), &request->acct,
393 request->v.size,
394 request->req.operation == BLKIF_OP_WRITE ?
4ea7d1a7 395 BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH);
e7f5b5f8
PD
396 request->aio_inflight++;
397 blk_aio_pwritev(dataplane->blk, request->start, &request->v, 0,
d4683cf9 398 xen_block_complete_aio, request);
4ea7d1a7
PD
399 break;
400 case BLKIF_OP_DISCARD:
401 {
e7f5b5f8 402 struct blkif_request_discard *req = (void *)&request->req;
d4683cf9
PD
403 if (!xen_block_split_discard(request, req->sector_number,
404 req->nr_sectors)) {
4ea7d1a7
PD
405 goto err;
406 }
407 break;
408 }
409 default:
410 /* unknown operation (shouldn't happen -- parse catches this) */
411 goto err;
412 }
413
d4683cf9 414 xen_block_complete_aio(request, 0);
4ea7d1a7
PD
415
416 return 0;
417
418err:
e7f5b5f8 419 request->status = BLKIF_RSP_ERROR;
36d883ba 420 xen_block_complete_request(request);
4ea7d1a7
PD
421 return -1;
422}
423
bfd0d636 424static int xen_block_send_response(XenBlockRequest *request)
4ea7d1a7 425{
e7f5b5f8 426 XenBlockDataPlane *dataplane = request->dataplane;
fcab2b46
PD
427 int send_notify = 0;
428 int have_requests = 0;
429 blkif_response_t *resp;
4ea7d1a7
PD
430
431 /* Place on the response ring for the relevant domain. */
f3b604e3 432 switch (dataplane->protocol) {
4ea7d1a7
PD
433 case BLKIF_PROTOCOL_NATIVE:
434 resp = (blkif_response_t *)RING_GET_RESPONSE(
f3b604e3
PD
435 &dataplane->rings.native,
436 dataplane->rings.native.rsp_prod_pvt);
4ea7d1a7
PD
437 break;
438 case BLKIF_PROTOCOL_X86_32:
439 resp = (blkif_response_t *)RING_GET_RESPONSE(
f3b604e3
PD
440 &dataplane->rings.x86_32_part,
441 dataplane->rings.x86_32_part.rsp_prod_pvt);
4ea7d1a7
PD
442 break;
443 case BLKIF_PROTOCOL_X86_64:
444 resp = (blkif_response_t *)RING_GET_RESPONSE(
f3b604e3
PD
445 &dataplane->rings.x86_64_part,
446 dataplane->rings.x86_64_part.rsp_prod_pvt);
4ea7d1a7
PD
447 break;
448 default:
449 return 0;
450 }
451
e7f5b5f8
PD
452 resp->id = request->req.id;
453 resp->operation = request->req.operation;
454 resp->status = request->status;
4ea7d1a7 455
f3b604e3 456 dataplane->rings.common.rsp_prod_pvt++;
4ea7d1a7 457
f3b604e3
PD
458 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&dataplane->rings.common,
459 send_notify);
460 if (dataplane->rings.common.rsp_prod_pvt ==
461 dataplane->rings.common.req_cons) {
4ea7d1a7
PD
462 /*
463 * Tail check for pending requests. Allows frontend to avoid
464 * notifications if requests are already in flight (lower
465 * overheads and promotes batching).
466 */
f3b604e3
PD
467 RING_FINAL_CHECK_FOR_REQUESTS(&dataplane->rings.common,
468 have_requests);
469 } else if (RING_HAS_UNCONSUMED_REQUESTS(&dataplane->rings.common)) {
4ea7d1a7
PD
470 have_requests = 1;
471 }
472
473 if (have_requests) {
f3b604e3 474 dataplane->more_work++;
4ea7d1a7
PD
475 }
476 return send_notify;
477}
478
d4683cf9
PD
479static int xen_block_get_request(XenBlockDataPlane *dataplane,
480 XenBlockRequest *request, RING_IDX rc)
4ea7d1a7 481{
f3b604e3
PD
482 switch (dataplane->protocol) {
483 case BLKIF_PROTOCOL_NATIVE: {
484 blkif_request_t *req =
485 RING_GET_REQUEST(&dataplane->rings.native, rc);
486
e7f5b5f8 487 memcpy(&request->req, req, sizeof(request->req));
4ea7d1a7 488 break;
f3b604e3
PD
489 }
490 case BLKIF_PROTOCOL_X86_32: {
491 blkif_x86_32_request_t *req =
492 RING_GET_REQUEST(&dataplane->rings.x86_32_part, rc);
493
e7f5b5f8 494 blkif_get_x86_32_req(&request->req, req);
4ea7d1a7 495 break;
f3b604e3
PD
496 }
497 case BLKIF_PROTOCOL_X86_64: {
498 blkif_x86_64_request_t *req =
499 RING_GET_REQUEST(&dataplane->rings.x86_64_part, rc);
500
e7f5b5f8 501 blkif_get_x86_64_req(&request->req, req);
4ea7d1a7
PD
502 break;
503 }
f3b604e3 504 }
4ea7d1a7
PD
505 /* Prevent the compiler from accessing the on-ring fields instead. */
506 barrier();
507 return 0;
508}
509
6de45f91
TS
510/*
511 * Threshold of in-flight requests above which we will start using
512 * blk_io_plug()/blk_io_unplug() to batch requests.
513 */
514#define IO_PLUG_THRESHOLD 1
515
345f42b4 516static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
4ea7d1a7
PD
517{
518 RING_IDX rc, rp;
e7f5b5f8 519 XenBlockRequest *request;
6de45f91
TS
520 int inflight_atstart = dataplane->requests_inflight;
521 int batched = 0;
345f42b4 522 bool done_something = false;
4ea7d1a7 523
f3b604e3 524 dataplane->more_work = 0;
4ea7d1a7 525
f3b604e3
PD
526 rc = dataplane->rings.common.req_cons;
527 rp = dataplane->rings.common.sring->req_prod;
4ea7d1a7
PD
528 xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
529
6de45f91
TS
530 /*
531 * If there was more than IO_PLUG_THRESHOLD requests in flight
532 * when we got here, this is an indication that there the bottleneck
533 * is below us, so it's worth beginning to batch up I/O requests
534 * rather than submitting them immediately. The maximum number
535 * of requests we're willing to batch is the number already in
536 * flight, so it can grow up to max_requests when the bottleneck
537 * is below us.
538 */
539 if (inflight_atstart > IO_PLUG_THRESHOLD) {
540 blk_io_plug(dataplane->blk);
541 }
4ea7d1a7
PD
542 while (rc != rp) {
543 /* pull request from ring */
f3b604e3 544 if (RING_REQUEST_CONS_OVERFLOW(&dataplane->rings.common, rc)) {
4ea7d1a7
PD
545 break;
546 }
d4683cf9 547 request = xen_block_start_request(dataplane);
e7f5b5f8 548 if (request == NULL) {
f3b604e3 549 dataplane->more_work++;
4ea7d1a7
PD
550 break;
551 }
d4683cf9 552 xen_block_get_request(dataplane, request, rc);
f3b604e3 553 dataplane->rings.common.req_cons = ++rc;
345f42b4 554 done_something = true;
4ea7d1a7
PD
555
556 /* parse them */
d4683cf9 557 if (xen_block_parse_request(request) != 0) {
e7f5b5f8 558 switch (request->req.operation) {
4ea7d1a7 559 case BLKIF_OP_READ:
f3b604e3 560 block_acct_invalid(blk_get_stats(dataplane->blk),
4ea7d1a7
PD
561 BLOCK_ACCT_READ);
562 break;
563 case BLKIF_OP_WRITE:
f3b604e3 564 block_acct_invalid(blk_get_stats(dataplane->blk),
4ea7d1a7
PD
565 BLOCK_ACCT_WRITE);
566 break;
567 case BLKIF_OP_FLUSH_DISKCACHE:
f3b604e3 568 block_acct_invalid(blk_get_stats(dataplane->blk),
4ea7d1a7
PD
569 BLOCK_ACCT_FLUSH);
570 default:
571 break;
572 };
573
36d883ba 574 xen_block_complete_request(request);
4ea7d1a7
PD
575 continue;
576 }
577
6de45f91
TS
578 if (inflight_atstart > IO_PLUG_THRESHOLD &&
579 batched >= inflight_atstart) {
580 blk_io_unplug(dataplane->blk);
581 }
d4683cf9 582 xen_block_do_aio(request);
6de45f91
TS
583 if (inflight_atstart > IO_PLUG_THRESHOLD) {
584 if (batched >= inflight_atstart) {
585 blk_io_plug(dataplane->blk);
586 batched = 0;
587 } else {
588 batched++;
589 }
590 }
591 }
592 if (inflight_atstart > IO_PLUG_THRESHOLD) {
593 blk_io_unplug(dataplane->blk);
4ea7d1a7
PD
594 }
595
345f42b4 596 return done_something;
4ea7d1a7
PD
597}
598
d4683cf9 599static void xen_block_dataplane_bh(void *opaque)
4ea7d1a7 600{
f3b604e3 601 XenBlockDataPlane *dataplane = opaque;
4ea7d1a7 602
f3b604e3 603 aio_context_acquire(dataplane->ctx);
d4683cf9 604 xen_block_handle_requests(dataplane);
f3b604e3 605 aio_context_release(dataplane->ctx);
4ea7d1a7
PD
606}
607
345f42b4 608static bool xen_block_dataplane_event(void *opaque)
fcab2b46 609{
f3b604e3 610 XenBlockDataPlane *dataplane = opaque;
fcab2b46 611
345f42b4 612 return xen_block_handle_requests(dataplane);
fcab2b46
PD
613}
614
f3b604e3 615XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
5feeb718
PD
616 BlockBackend *blk,
617 unsigned int sector_size,
f3b604e3 618 IOThread *iothread)
4ea7d1a7 619{
f3b604e3 620 XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1);
4ea7d1a7 621
f3b604e3 622 dataplane->xendev = xendev;
5feeb718
PD
623 dataplane->blk = blk;
624 dataplane->sector_size = sector_size;
4ea7d1a7 625
f3b604e3 626 QLIST_INIT(&dataplane->inflight);
f3b604e3 627 QLIST_INIT(&dataplane->freelist);
4ea7d1a7 628
fcab2b46 629 if (iothread) {
f3b604e3
PD
630 dataplane->iothread = iothread;
631 object_ref(OBJECT(dataplane->iothread));
632 dataplane->ctx = iothread_get_aio_context(dataplane->iothread);
fcab2b46 633 } else {
f3b604e3 634 dataplane->ctx = qemu_get_aio_context();
fcab2b46 635 }
f63192b0
AB
636 dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh,
637 dataplane,
638 &DEVICE(xendev)->mem_reentrancy_guard);
fcab2b46 639
f3b604e3 640 return dataplane;
4ea7d1a7
PD
641}
642
f3b604e3 643void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane)
4ea7d1a7 644{
e7f5b5f8 645 XenBlockRequest *request;
4ea7d1a7 646
f3b604e3 647 if (!dataplane) {
fcab2b46
PD
648 return;
649 }
4ea7d1a7 650
f3b604e3 651 while (!QLIST_EMPTY(&dataplane->freelist)) {
e7f5b5f8
PD
652 request = QLIST_FIRST(&dataplane->freelist);
653 QLIST_REMOVE(request, list);
654 qemu_iovec_destroy(&request->v);
c6025bd1 655 qemu_vfree(request->buf);
e7f5b5f8 656 g_free(request);
4ea7d1a7
PD
657 }
658
f3b604e3
PD
659 qemu_bh_delete(dataplane->bh);
660 if (dataplane->iothread) {
661 object_unref(OBJECT(dataplane->iothread));
fcab2b46
PD
662 }
663
f3b604e3 664 g_free(dataplane);
4ea7d1a7
PD
665}
666
f6eac904
SH
667void xen_block_dataplane_detach(XenBlockDataPlane *dataplane)
668{
669 if (!dataplane || !dataplane->event_channel) {
670 return;
671 }
672
673 /* Only reason for failure is a NULL channel */
674 xen_device_set_event_channel_context(dataplane->xendev,
675 dataplane->event_channel,
676 NULL, &error_abort);
677}
678
679void xen_block_dataplane_attach(XenBlockDataPlane *dataplane)
680{
681 if (!dataplane || !dataplane->event_channel) {
682 return;
683 }
684
685 /* Only reason for failure is a NULL channel */
686 xen_device_set_event_channel_context(dataplane->xendev,
687 dataplane->event_channel,
688 dataplane->ctx, &error_abort);
689}
690
f3b604e3 691void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
4ea7d1a7 692{
fcab2b46 693 XenDevice *xendev;
4ea7d1a7 694
f3b604e3 695 if (!dataplane) {
fcab2b46
PD
696 return;
697 }
698
32d0b7be
PD
699 xendev = dataplane->xendev;
700
f6eac904
SH
701 if (!blk_in_drain(dataplane->blk)) {
702 xen_block_dataplane_detach(dataplane);
32d0b7be 703 }
f6eac904
SH
704
705 aio_context_acquire(dataplane->ctx);
97896a48
KW
706 /* Xen doesn't have multiple users for nodes, so this can't fail */
707 blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort);
f3b604e3 708 aio_context_release(dataplane->ctx);
fcab2b46 709
32d0b7be
PD
710 /*
711 * Now that the context has been moved onto the main thread, cancel
712 * further processing.
713 */
714 qemu_bh_cancel(dataplane->bh);
fcab2b46 715
f3b604e3 716 if (dataplane->event_channel) {
fcab2b46
PD
717 Error *local_err = NULL;
718
f3b604e3 719 xen_device_unbind_event_channel(xendev, dataplane->event_channel,
fcab2b46 720 &local_err);
f3b604e3 721 dataplane->event_channel = NULL;
fcab2b46
PD
722
723 if (local_err) {
724 error_report_err(local_err);
725 }
726 }
727
f3b604e3 728 if (dataplane->sring) {
fcab2b46
PD
729 Error *local_err = NULL;
730
f3b604e3 731 xen_device_unmap_grant_refs(xendev, dataplane->sring,
f80fad16 732 dataplane->ring_ref,
f3b604e3
PD
733 dataplane->nr_ring_ref, &local_err);
734 dataplane->sring = NULL;
fcab2b46
PD
735
736 if (local_err) {
737 error_report_err(local_err);
738 }
739 }
740
f3b604e3
PD
741 g_free(dataplane->ring_ref);
742 dataplane->ring_ref = NULL;
fcab2b46
PD
743}
744
f3b604e3 745void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
fcab2b46
PD
746 const unsigned int ring_ref[],
747 unsigned int nr_ring_ref,
748 unsigned int event_channel,
749 unsigned int protocol,
750 Error **errp)
751{
1de7096d 752 ERRP_GUARD();
f3b604e3 753 XenDevice *xendev = dataplane->xendev;
c7040ff6 754 AioContext *old_context;
fcab2b46
PD
755 unsigned int ring_size;
756 unsigned int i;
757
f3b604e3
PD
758 dataplane->nr_ring_ref = nr_ring_ref;
759 dataplane->ring_ref = g_new(unsigned int, nr_ring_ref);
fcab2b46
PD
760
761 for (i = 0; i < nr_ring_ref; i++) {
f3b604e3 762 dataplane->ring_ref[i] = ring_ref[i];
fcab2b46
PD
763 }
764
f3b604e3 765 dataplane->protocol = protocol;
fcab2b46 766
a9ae1418 767 ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref;
f3b604e3 768 switch (dataplane->protocol) {
fcab2b46
PD
769 case BLKIF_PROTOCOL_NATIVE:
770 {
f3b604e3 771 dataplane->max_requests = __CONST_RING_SIZE(blkif, ring_size);
fcab2b46
PD
772 break;
773 }
774 case BLKIF_PROTOCOL_X86_32:
775 {
f3b604e3 776 dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size);
fcab2b46
PD
777 break;
778 }
779 case BLKIF_PROTOCOL_X86_64:
780 {
f3b604e3 781 dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size);
fcab2b46
PD
782 break;
783 }
784 default:
f3b604e3 785 error_setg(errp, "unknown protocol %u", dataplane->protocol);
fcab2b46
PD
786 return;
787 }
788
f3b604e3 789 xen_device_set_max_grant_refs(xendev, dataplane->nr_ring_ref,
1de7096d
VSO
790 errp);
791 if (*errp) {
fcab2b46
PD
792 goto stop;
793 }
794
f3b604e3
PD
795 dataplane->sring = xen_device_map_grant_refs(xendev,
796 dataplane->ring_ref,
797 dataplane->nr_ring_ref,
fcab2b46 798 PROT_READ | PROT_WRITE,
1de7096d
VSO
799 errp);
800 if (*errp) {
fcab2b46
PD
801 goto stop;
802 }
803
f3b604e3 804 switch (dataplane->protocol) {
fcab2b46
PD
805 case BLKIF_PROTOCOL_NATIVE:
806 {
f3b604e3 807 blkif_sring_t *sring_native = dataplane->sring;
fcab2b46 808
f3b604e3 809 BACK_RING_INIT(&dataplane->rings.native, sring_native, ring_size);
fcab2b46
PD
810 break;
811 }
812 case BLKIF_PROTOCOL_X86_32:
813 {
f3b604e3 814 blkif_x86_32_sring_t *sring_x86_32 = dataplane->sring;
fcab2b46 815
f3b604e3 816 BACK_RING_INIT(&dataplane->rings.x86_32_part, sring_x86_32,
fcab2b46
PD
817 ring_size);
818 break;
819 }
820 case BLKIF_PROTOCOL_X86_64:
821 {
f3b604e3 822 blkif_x86_64_sring_t *sring_x86_64 = dataplane->sring;
fcab2b46 823
f3b604e3 824 BACK_RING_INIT(&dataplane->rings.x86_64_part, sring_x86_64,
fcab2b46
PD
825 ring_size);
826 break;
827 }
828 }
829
f3b604e3 830 dataplane->event_channel =
32d0b7be 831 xen_device_bind_event_channel(xendev, event_channel,
d4683cf9 832 xen_block_dataplane_event, dataplane,
1de7096d
VSO
833 errp);
834 if (*errp) {
fcab2b46
PD
835 goto stop;
836 }
837
c7040ff6
SL
838 old_context = blk_get_aio_context(dataplane->blk);
839 aio_context_acquire(old_context);
97896a48
KW
840 /* If other users keep the BlockBackend in the iothread, that's ok */
841 blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL);
c7040ff6
SL
842 aio_context_release(old_context);
843
f6eac904
SH
844 if (!blk_in_drain(dataplane->blk)) {
845 xen_block_dataplane_attach(dataplane);
846 }
32d0b7be 847
fcab2b46
PD
848 return;
849
850stop:
f3b604e3 851 xen_block_dataplane_stop(dataplane);
4ea7d1a7 852}