]>
Commit | Line | Data |
---|---|---|
4ea7d1a7 | 1 | /* |
ca072800 PD |
2 | * Copyright (c) 2018 Citrix Systems Inc. |
3 | * (c) Gerd Hoffmann <kraxel@redhat.com> | |
4ea7d1a7 | 4 | * |
ca072800 PD |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation; under version 2 of the License. | |
4ea7d1a7 | 8 | * |
ca072800 PD |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
4ea7d1a7 | 13 | * |
ca072800 PD |
14 | * You should have received a copy of the GNU General Public License along |
15 | * with this program; if not, see <http://www.gnu.org/licenses/>. | |
4ea7d1a7 | 16 | * |
ca072800 PD |
17 | * Contributions after 2012-01-13 are licensed under the terms of the |
18 | * GNU GPL, version 2 or (at your option) any later version. | |
4ea7d1a7 PD |
19 | */ |
20 | ||
fcab2b46 PD |
21 | #include "qemu/osdep.h" |
22 | #include "qemu/error-report.h" | |
db725815 | 23 | #include "qemu/main-loop.h" |
fcab2b46 | 24 | #include "qapi/error.h" |
fcab2b46 PD |
25 | #include "hw/xen/xen_common.h" |
26 | #include "hw/block/xen_blkif.h" | |
27 | #include "sysemu/block-backend.h" | |
28 | #include "sysemu/iothread.h" | |
29 | #include "xen-block.h" | |
30 | ||
e7f5b5f8 | 31 | typedef struct XenBlockRequest { |
fcab2b46 PD |
32 | blkif_request_t req; |
33 | int16_t status; | |
34 | off_t start; | |
35 | QEMUIOVector v; | |
36 | void *buf; | |
37 | size_t size; | |
38 | int presync; | |
39 | int aio_inflight; | |
40 | int aio_errors; | |
f3b604e3 | 41 | XenBlockDataPlane *dataplane; |
e7f5b5f8 | 42 | QLIST_ENTRY(XenBlockRequest) list; |
fcab2b46 | 43 | BlockAcctCookie acct; |
e7f5b5f8 | 44 | } XenBlockRequest; |
4ea7d1a7 | 45 | |
f3b604e3 | 46 | struct XenBlockDataPlane { |
fcab2b46 PD |
47 | XenDevice *xendev; |
48 | XenEventChannel *event_channel; | |
49 | unsigned int *ring_ref; | |
50 | unsigned int nr_ring_ref; | |
51 | void *sring; | |
fcab2b46 PD |
52 | int protocol; |
53 | blkif_back_rings_t rings; | |
54 | int more_work; | |
e7f5b5f8 | 55 | QLIST_HEAD(inflight_head, XenBlockRequest) inflight; |
e7f5b5f8 | 56 | QLIST_HEAD(freelist_head, XenBlockRequest) freelist; |
fcab2b46 PD |
57 | int requests_total; |
58 | int requests_inflight; | |
fcab2b46 PD |
59 | unsigned int max_requests; |
60 | BlockBackend *blk; | |
5feeb718 | 61 | unsigned int sector_size; |
fcab2b46 PD |
62 | QEMUBH *bh; |
63 | IOThread *iothread; | |
64 | AioContext *ctx; | |
4ea7d1a7 PD |
65 | }; |
66 | ||
36d883ba AP |
67 | static int xen_block_send_response(XenBlockRequest *request); |
68 | ||
d4683cf9 | 69 | static void reset_request(XenBlockRequest *request) |
4ea7d1a7 | 70 | { |
e7f5b5f8 PD |
71 | memset(&request->req, 0, sizeof(request->req)); |
72 | request->status = 0; | |
73 | request->start = 0; | |
e7f5b5f8 PD |
74 | request->size = 0; |
75 | request->presync = 0; | |
4ea7d1a7 | 76 | |
e7f5b5f8 PD |
77 | request->aio_inflight = 0; |
78 | request->aio_errors = 0; | |
4ea7d1a7 | 79 | |
e7f5b5f8 PD |
80 | request->dataplane = NULL; |
81 | memset(&request->list, 0, sizeof(request->list)); | |
82 | memset(&request->acct, 0, sizeof(request->acct)); | |
4ea7d1a7 | 83 | |
e7f5b5f8 | 84 | qemu_iovec_reset(&request->v); |
4ea7d1a7 PD |
85 | } |
86 | ||
d4683cf9 | 87 | static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 88 | { |
e7f5b5f8 | 89 | XenBlockRequest *request = NULL; |
4ea7d1a7 | 90 | |
f3b604e3 PD |
91 | if (QLIST_EMPTY(&dataplane->freelist)) { |
92 | if (dataplane->requests_total >= dataplane->max_requests) { | |
4ea7d1a7 PD |
93 | goto out; |
94 | } | |
95 | /* allocate new struct */ | |
e7f5b5f8 PD |
96 | request = g_malloc0(sizeof(*request)); |
97 | request->dataplane = dataplane; | |
c6025bd1 TS |
98 | /* |
99 | * We cannot need more pages per requests than this, and since we | |
100 | * re-use requests, allocate the memory once here. It will be freed | |
101 | * xen_block_dataplane_destroy() when the request list is freed. | |
102 | */ | |
103 | request->buf = qemu_memalign(XC_PAGE_SIZE, | |
104 | BLKIF_MAX_SEGMENTS_PER_REQUEST * | |
105 | XC_PAGE_SIZE); | |
f3b604e3 | 106 | dataplane->requests_total++; |
e7f5b5f8 | 107 | qemu_iovec_init(&request->v, 1); |
4ea7d1a7 PD |
108 | } else { |
109 | /* get one from freelist */ | |
e7f5b5f8 PD |
110 | request = QLIST_FIRST(&dataplane->freelist); |
111 | QLIST_REMOVE(request, list); | |
4ea7d1a7 | 112 | } |
e7f5b5f8 | 113 | QLIST_INSERT_HEAD(&dataplane->inflight, request, list); |
f3b604e3 | 114 | dataplane->requests_inflight++; |
4ea7d1a7 PD |
115 | |
116 | out: | |
e7f5b5f8 | 117 | return request; |
4ea7d1a7 PD |
118 | } |
119 | ||
36d883ba | 120 | static void xen_block_complete_request(XenBlockRequest *request) |
4ea7d1a7 | 121 | { |
e7f5b5f8 | 122 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 | 123 | |
36d883ba AP |
124 | if (xen_block_send_response(request)) { |
125 | Error *local_err = NULL; | |
4ea7d1a7 | 126 | |
36d883ba AP |
127 | xen_device_notify_event_channel(dataplane->xendev, |
128 | dataplane->event_channel, | |
129 | &local_err); | |
130 | if (local_err) { | |
131 | error_report_err(local_err); | |
132 | } | |
133 | } | |
4ea7d1a7 | 134 | |
e7f5b5f8 | 135 | QLIST_REMOVE(request, list); |
36d883ba | 136 | dataplane->requests_inflight--; |
d4683cf9 | 137 | reset_request(request); |
e7f5b5f8 PD |
138 | request->dataplane = dataplane; |
139 | QLIST_INSERT_HEAD(&dataplane->freelist, request, list); | |
4ea7d1a7 PD |
140 | } |
141 | ||
142 | /* | |
143 | * translate request into iovec + start offset | |
144 | * do sanity checks along the way | |
145 | */ | |
d4683cf9 | 146 | static int xen_block_parse_request(XenBlockRequest *request) |
4ea7d1a7 | 147 | { |
e7f5b5f8 | 148 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 PD |
149 | size_t len; |
150 | int i; | |
151 | ||
e7f5b5f8 | 152 | switch (request->req.operation) { |
4ea7d1a7 PD |
153 | case BLKIF_OP_READ: |
154 | break; | |
155 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 PD |
156 | request->presync = 1; |
157 | if (!request->req.nr_segments) { | |
4ea7d1a7 PD |
158 | return 0; |
159 | } | |
160 | /* fall through */ | |
161 | case BLKIF_OP_WRITE: | |
162 | break; | |
163 | case BLKIF_OP_DISCARD: | |
164 | return 0; | |
165 | default: | |
e7f5b5f8 | 166 | error_report("error: unknown operation (%d)", request->req.operation); |
4ea7d1a7 PD |
167 | goto err; |
168 | }; | |
169 | ||
e7f5b5f8 | 170 | if (request->req.operation != BLKIF_OP_READ && |
f3b604e3 | 171 | blk_is_read_only(dataplane->blk)) { |
ca072800 | 172 | error_report("error: write req for ro device"); |
4ea7d1a7 PD |
173 | goto err; |
174 | } | |
175 | ||
5feeb718 | 176 | request->start = request->req.sector_number * dataplane->sector_size; |
e7f5b5f8 | 177 | for (i = 0; i < request->req.nr_segments; i++) { |
4ea7d1a7 | 178 | if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
ca072800 | 179 | error_report("error: nr_segments too big"); |
4ea7d1a7 PD |
180 | goto err; |
181 | } | |
e7f5b5f8 | 182 | if (request->req.seg[i].first_sect > request->req.seg[i].last_sect) { |
ca072800 | 183 | error_report("error: first > last sector"); |
4ea7d1a7 PD |
184 | goto err; |
185 | } | |
5feeb718 | 186 | if (request->req.seg[i].last_sect * dataplane->sector_size >= |
e7f5b5f8 | 187 | XC_PAGE_SIZE) { |
ca072800 | 188 | error_report("error: page crossing"); |
4ea7d1a7 PD |
189 | goto err; |
190 | } | |
191 | ||
e7f5b5f8 | 192 | len = (request->req.seg[i].last_sect - |
5feeb718 | 193 | request->req.seg[i].first_sect + 1) * dataplane->sector_size; |
e7f5b5f8 | 194 | request->size += len; |
4ea7d1a7 | 195 | } |
3149f183 | 196 | if (request->start + request->size > blk_getlength(dataplane->blk)) { |
ca072800 | 197 | error_report("error: access beyond end of file"); |
4ea7d1a7 PD |
198 | goto err; |
199 | } | |
200 | return 0; | |
201 | ||
202 | err: | |
e7f5b5f8 | 203 | request->status = BLKIF_RSP_ERROR; |
4ea7d1a7 PD |
204 | return -1; |
205 | } | |
206 | ||
d4683cf9 | 207 | static int xen_block_copy_request(XenBlockRequest *request) |
4ea7d1a7 | 208 | { |
e7f5b5f8 | 209 | XenBlockDataPlane *dataplane = request->dataplane; |
f3b604e3 | 210 | XenDevice *xendev = dataplane->xendev; |
fcab2b46 PD |
211 | XenDeviceGrantCopySegment segs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
212 | int i, count; | |
e7f5b5f8 PD |
213 | bool to_domain = (request->req.operation == BLKIF_OP_READ); |
214 | void *virt = request->buf; | |
fcab2b46 | 215 | Error *local_err = NULL; |
4ea7d1a7 | 216 | |
e7f5b5f8 | 217 | if (request->req.nr_segments == 0) { |
4ea7d1a7 PD |
218 | return 0; |
219 | } | |
220 | ||
e7f5b5f8 | 221 | count = request->req.nr_segments; |
4ea7d1a7 PD |
222 | |
223 | for (i = 0; i < count; i++) { | |
224 | if (to_domain) { | |
e7f5b5f8 PD |
225 | segs[i].dest.foreign.ref = request->req.seg[i].gref; |
226 | segs[i].dest.foreign.offset = request->req.seg[i].first_sect * | |
5feeb718 | 227 | dataplane->sector_size; |
4ea7d1a7 PD |
228 | segs[i].source.virt = virt; |
229 | } else { | |
e7f5b5f8 PD |
230 | segs[i].source.foreign.ref = request->req.seg[i].gref; |
231 | segs[i].source.foreign.offset = request->req.seg[i].first_sect * | |
5feeb718 | 232 | dataplane->sector_size; |
4ea7d1a7 PD |
233 | segs[i].dest.virt = virt; |
234 | } | |
e7f5b5f8 | 235 | segs[i].len = (request->req.seg[i].last_sect - |
2bcd05cf | 236 | request->req.seg[i].first_sect + 1) * |
5feeb718 | 237 | dataplane->sector_size; |
4ea7d1a7 PD |
238 | virt += segs[i].len; |
239 | } | |
240 | ||
fcab2b46 PD |
241 | xen_device_copy_grant_refs(xendev, to_domain, segs, count, &local_err); |
242 | ||
243 | if (local_err) { | |
244 | error_reportf_err(local_err, "failed to copy data: "); | |
4ea7d1a7 | 245 | |
e7f5b5f8 | 246 | request->aio_errors++; |
4ea7d1a7 PD |
247 | return -1; |
248 | } | |
249 | ||
fcab2b46 | 250 | return 0; |
4ea7d1a7 PD |
251 | } |
252 | ||
d4683cf9 | 253 | static int xen_block_do_aio(XenBlockRequest *request); |
4ea7d1a7 | 254 | |
d4683cf9 | 255 | static void xen_block_complete_aio(void *opaque, int ret) |
4ea7d1a7 | 256 | { |
e7f5b5f8 PD |
257 | XenBlockRequest *request = opaque; |
258 | XenBlockDataPlane *dataplane = request->dataplane; | |
4ea7d1a7 | 259 | |
f3b604e3 | 260 | aio_context_acquire(dataplane->ctx); |
4ea7d1a7 PD |
261 | |
262 | if (ret != 0) { | |
ca072800 | 263 | error_report("%s I/O error", |
e7f5b5f8 | 264 | request->req.operation == BLKIF_OP_READ ? |
ca072800 | 265 | "read" : "write"); |
e7f5b5f8 | 266 | request->aio_errors++; |
4ea7d1a7 PD |
267 | } |
268 | ||
e7f5b5f8 PD |
269 | request->aio_inflight--; |
270 | if (request->presync) { | |
271 | request->presync = 0; | |
d4683cf9 | 272 | xen_block_do_aio(request); |
4ea7d1a7 PD |
273 | goto done; |
274 | } | |
e7f5b5f8 | 275 | if (request->aio_inflight > 0) { |
4ea7d1a7 PD |
276 | goto done; |
277 | } | |
278 | ||
e7f5b5f8 | 279 | switch (request->req.operation) { |
4ea7d1a7 | 280 | case BLKIF_OP_READ: |
e7f5b5f8 | 281 | /* in case of failure request->aio_errors is increased */ |
4ea7d1a7 | 282 | if (ret == 0) { |
d4683cf9 | 283 | xen_block_copy_request(request); |
4ea7d1a7 | 284 | } |
4ea7d1a7 PD |
285 | break; |
286 | case BLKIF_OP_WRITE: | |
287 | case BLKIF_OP_FLUSH_DISKCACHE: | |
4ea7d1a7 PD |
288 | default: |
289 | break; | |
290 | } | |
291 | ||
e7f5b5f8 | 292 | request->status = request->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; |
4ea7d1a7 | 293 | |
e7f5b5f8 | 294 | switch (request->req.operation) { |
4ea7d1a7 PD |
295 | case BLKIF_OP_WRITE: |
296 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 | 297 | if (!request->req.nr_segments) { |
4ea7d1a7 PD |
298 | break; |
299 | } | |
e02d9494 | 300 | /* fall through */ |
4ea7d1a7 | 301 | case BLKIF_OP_READ: |
e7f5b5f8 PD |
302 | if (request->status == BLKIF_RSP_OKAY) { |
303 | block_acct_done(blk_get_stats(dataplane->blk), &request->acct); | |
4ea7d1a7 | 304 | } else { |
e7f5b5f8 | 305 | block_acct_failed(blk_get_stats(dataplane->blk), &request->acct); |
4ea7d1a7 PD |
306 | } |
307 | break; | |
308 | case BLKIF_OP_DISCARD: | |
309 | default: | |
310 | break; | |
311 | } | |
bfd0d636 | 312 | |
36d883ba | 313 | xen_block_complete_request(request); |
bfd0d636 | 314 | |
345f42b4 PD |
315 | if (dataplane->more_work) { |
316 | qemu_bh_schedule(dataplane->bh); | |
317 | } | |
4ea7d1a7 PD |
318 | |
319 | done: | |
f3b604e3 | 320 | aio_context_release(dataplane->ctx); |
4ea7d1a7 PD |
321 | } |
322 | ||
d4683cf9 PD |
323 | static bool xen_block_split_discard(XenBlockRequest *request, |
324 | blkif_sector_t sector_number, | |
325 | uint64_t nr_sectors) | |
4ea7d1a7 | 326 | { |
e7f5b5f8 | 327 | XenBlockDataPlane *dataplane = request->dataplane; |
4ea7d1a7 PD |
328 | int64_t byte_offset; |
329 | int byte_chunk; | |
2bcd05cf | 330 | uint64_t byte_remaining; |
4ea7d1a7 PD |
331 | uint64_t sec_start = sector_number; |
332 | uint64_t sec_count = nr_sectors; | |
333 | ||
334 | /* Wrap around, or overflowing byte limit? */ | |
335 | if (sec_start + sec_count < sec_count || | |
5feeb718 | 336 | sec_start + sec_count > INT64_MAX / dataplane->sector_size) { |
4ea7d1a7 PD |
337 | return false; |
338 | } | |
339 | ||
5feeb718 PD |
340 | byte_offset = sec_start * dataplane->sector_size; |
341 | byte_remaining = sec_count * dataplane->sector_size; | |
4ea7d1a7 PD |
342 | |
343 | do { | |
2bcd05cf PD |
344 | byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ? |
345 | BDRV_REQUEST_MAX_BYTES : byte_remaining; | |
e7f5b5f8 | 346 | request->aio_inflight++; |
f3b604e3 | 347 | blk_aio_pdiscard(dataplane->blk, byte_offset, byte_chunk, |
d4683cf9 | 348 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
349 | byte_remaining -= byte_chunk; |
350 | byte_offset += byte_chunk; | |
351 | } while (byte_remaining > 0); | |
352 | ||
353 | return true; | |
354 | } | |
355 | ||
d4683cf9 | 356 | static int xen_block_do_aio(XenBlockRequest *request) |
4ea7d1a7 | 357 | { |
e7f5b5f8 PD |
358 | XenBlockDataPlane *dataplane = request->dataplane; |
359 | ||
e7f5b5f8 PD |
360 | if (request->req.nr_segments && |
361 | (request->req.operation == BLKIF_OP_WRITE || | |
362 | request->req.operation == BLKIF_OP_FLUSH_DISKCACHE) && | |
d4683cf9 | 363 | xen_block_copy_request(request)) { |
4ea7d1a7 PD |
364 | goto err; |
365 | } | |
366 | ||
e7f5b5f8 PD |
367 | request->aio_inflight++; |
368 | if (request->presync) { | |
d4683cf9 PD |
369 | blk_aio_flush(request->dataplane->blk, xen_block_complete_aio, |
370 | request); | |
4ea7d1a7 PD |
371 | return 0; |
372 | } | |
373 | ||
e7f5b5f8 | 374 | switch (request->req.operation) { |
4ea7d1a7 | 375 | case BLKIF_OP_READ: |
e7f5b5f8 PD |
376 | qemu_iovec_add(&request->v, request->buf, request->size); |
377 | block_acct_start(blk_get_stats(dataplane->blk), &request->acct, | |
378 | request->v.size, BLOCK_ACCT_READ); | |
379 | request->aio_inflight++; | |
380 | blk_aio_preadv(dataplane->blk, request->start, &request->v, 0, | |
d4683cf9 | 381 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
382 | break; |
383 | case BLKIF_OP_WRITE: | |
384 | case BLKIF_OP_FLUSH_DISKCACHE: | |
e7f5b5f8 | 385 | if (!request->req.nr_segments) { |
4ea7d1a7 PD |
386 | break; |
387 | } | |
388 | ||
e7f5b5f8 PD |
389 | qemu_iovec_add(&request->v, request->buf, request->size); |
390 | block_acct_start(blk_get_stats(dataplane->blk), &request->acct, | |
391 | request->v.size, | |
392 | request->req.operation == BLKIF_OP_WRITE ? | |
4ea7d1a7 | 393 | BLOCK_ACCT_WRITE : BLOCK_ACCT_FLUSH); |
e7f5b5f8 PD |
394 | request->aio_inflight++; |
395 | blk_aio_pwritev(dataplane->blk, request->start, &request->v, 0, | |
d4683cf9 | 396 | xen_block_complete_aio, request); |
4ea7d1a7 PD |
397 | break; |
398 | case BLKIF_OP_DISCARD: | |
399 | { | |
e7f5b5f8 | 400 | struct blkif_request_discard *req = (void *)&request->req; |
d4683cf9 PD |
401 | if (!xen_block_split_discard(request, req->sector_number, |
402 | req->nr_sectors)) { | |
4ea7d1a7 PD |
403 | goto err; |
404 | } | |
405 | break; | |
406 | } | |
407 | default: | |
408 | /* unknown operation (shouldn't happen -- parse catches this) */ | |
409 | goto err; | |
410 | } | |
411 | ||
d4683cf9 | 412 | xen_block_complete_aio(request, 0); |
4ea7d1a7 PD |
413 | |
414 | return 0; | |
415 | ||
416 | err: | |
e7f5b5f8 | 417 | request->status = BLKIF_RSP_ERROR; |
36d883ba | 418 | xen_block_complete_request(request); |
4ea7d1a7 PD |
419 | return -1; |
420 | } | |
421 | ||
bfd0d636 | 422 | static int xen_block_send_response(XenBlockRequest *request) |
4ea7d1a7 | 423 | { |
e7f5b5f8 | 424 | XenBlockDataPlane *dataplane = request->dataplane; |
fcab2b46 PD |
425 | int send_notify = 0; |
426 | int have_requests = 0; | |
427 | blkif_response_t *resp; | |
4ea7d1a7 PD |
428 | |
429 | /* Place on the response ring for the relevant domain. */ | |
f3b604e3 | 430 | switch (dataplane->protocol) { |
4ea7d1a7 PD |
431 | case BLKIF_PROTOCOL_NATIVE: |
432 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
433 | &dataplane->rings.native, |
434 | dataplane->rings.native.rsp_prod_pvt); | |
4ea7d1a7 PD |
435 | break; |
436 | case BLKIF_PROTOCOL_X86_32: | |
437 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
438 | &dataplane->rings.x86_32_part, |
439 | dataplane->rings.x86_32_part.rsp_prod_pvt); | |
4ea7d1a7 PD |
440 | break; |
441 | case BLKIF_PROTOCOL_X86_64: | |
442 | resp = (blkif_response_t *)RING_GET_RESPONSE( | |
f3b604e3 PD |
443 | &dataplane->rings.x86_64_part, |
444 | dataplane->rings.x86_64_part.rsp_prod_pvt); | |
4ea7d1a7 PD |
445 | break; |
446 | default: | |
447 | return 0; | |
448 | } | |
449 | ||
e7f5b5f8 PD |
450 | resp->id = request->req.id; |
451 | resp->operation = request->req.operation; | |
452 | resp->status = request->status; | |
4ea7d1a7 | 453 | |
f3b604e3 | 454 | dataplane->rings.common.rsp_prod_pvt++; |
4ea7d1a7 | 455 | |
f3b604e3 PD |
456 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&dataplane->rings.common, |
457 | send_notify); | |
458 | if (dataplane->rings.common.rsp_prod_pvt == | |
459 | dataplane->rings.common.req_cons) { | |
4ea7d1a7 PD |
460 | /* |
461 | * Tail check for pending requests. Allows frontend to avoid | |
462 | * notifications if requests are already in flight (lower | |
463 | * overheads and promotes batching). | |
464 | */ | |
f3b604e3 PD |
465 | RING_FINAL_CHECK_FOR_REQUESTS(&dataplane->rings.common, |
466 | have_requests); | |
467 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&dataplane->rings.common)) { | |
4ea7d1a7 PD |
468 | have_requests = 1; |
469 | } | |
470 | ||
471 | if (have_requests) { | |
f3b604e3 | 472 | dataplane->more_work++; |
4ea7d1a7 PD |
473 | } |
474 | return send_notify; | |
475 | } | |
476 | ||
d4683cf9 PD |
477 | static int xen_block_get_request(XenBlockDataPlane *dataplane, |
478 | XenBlockRequest *request, RING_IDX rc) | |
4ea7d1a7 | 479 | { |
f3b604e3 PD |
480 | switch (dataplane->protocol) { |
481 | case BLKIF_PROTOCOL_NATIVE: { | |
482 | blkif_request_t *req = | |
483 | RING_GET_REQUEST(&dataplane->rings.native, rc); | |
484 | ||
e7f5b5f8 | 485 | memcpy(&request->req, req, sizeof(request->req)); |
4ea7d1a7 | 486 | break; |
f3b604e3 PD |
487 | } |
488 | case BLKIF_PROTOCOL_X86_32: { | |
489 | blkif_x86_32_request_t *req = | |
490 | RING_GET_REQUEST(&dataplane->rings.x86_32_part, rc); | |
491 | ||
e7f5b5f8 | 492 | blkif_get_x86_32_req(&request->req, req); |
4ea7d1a7 | 493 | break; |
f3b604e3 PD |
494 | } |
495 | case BLKIF_PROTOCOL_X86_64: { | |
496 | blkif_x86_64_request_t *req = | |
497 | RING_GET_REQUEST(&dataplane->rings.x86_64_part, rc); | |
498 | ||
e7f5b5f8 | 499 | blkif_get_x86_64_req(&request->req, req); |
4ea7d1a7 PD |
500 | break; |
501 | } | |
f3b604e3 | 502 | } |
4ea7d1a7 PD |
503 | /* Prevent the compiler from accessing the on-ring fields instead. */ |
504 | barrier(); | |
505 | return 0; | |
506 | } | |
507 | ||
6de45f91 TS |
508 | /* |
509 | * Threshold of in-flight requests above which we will start using | |
510 | * blk_io_plug()/blk_io_unplug() to batch requests. | |
511 | */ | |
512 | #define IO_PLUG_THRESHOLD 1 | |
513 | ||
345f42b4 | 514 | static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) |
4ea7d1a7 PD |
515 | { |
516 | RING_IDX rc, rp; | |
e7f5b5f8 | 517 | XenBlockRequest *request; |
6de45f91 TS |
518 | int inflight_atstart = dataplane->requests_inflight; |
519 | int batched = 0; | |
345f42b4 | 520 | bool done_something = false; |
4ea7d1a7 | 521 | |
f3b604e3 | 522 | dataplane->more_work = 0; |
4ea7d1a7 | 523 | |
f3b604e3 PD |
524 | rc = dataplane->rings.common.req_cons; |
525 | rp = dataplane->rings.common.sring->req_prod; | |
4ea7d1a7 PD |
526 | xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ |
527 | ||
6de45f91 TS |
528 | /* |
529 | * If there was more than IO_PLUG_THRESHOLD requests in flight | |
530 | * when we got here, this is an indication that there the bottleneck | |
531 | * is below us, so it's worth beginning to batch up I/O requests | |
532 | * rather than submitting them immediately. The maximum number | |
533 | * of requests we're willing to batch is the number already in | |
534 | * flight, so it can grow up to max_requests when the bottleneck | |
535 | * is below us. | |
536 | */ | |
537 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | |
538 | blk_io_plug(dataplane->blk); | |
539 | } | |
4ea7d1a7 PD |
540 | while (rc != rp) { |
541 | /* pull request from ring */ | |
f3b604e3 | 542 | if (RING_REQUEST_CONS_OVERFLOW(&dataplane->rings.common, rc)) { |
4ea7d1a7 PD |
543 | break; |
544 | } | |
d4683cf9 | 545 | request = xen_block_start_request(dataplane); |
e7f5b5f8 | 546 | if (request == NULL) { |
f3b604e3 | 547 | dataplane->more_work++; |
4ea7d1a7 PD |
548 | break; |
549 | } | |
d4683cf9 | 550 | xen_block_get_request(dataplane, request, rc); |
f3b604e3 | 551 | dataplane->rings.common.req_cons = ++rc; |
345f42b4 | 552 | done_something = true; |
4ea7d1a7 PD |
553 | |
554 | /* parse them */ | |
d4683cf9 | 555 | if (xen_block_parse_request(request) != 0) { |
e7f5b5f8 | 556 | switch (request->req.operation) { |
4ea7d1a7 | 557 | case BLKIF_OP_READ: |
f3b604e3 | 558 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
559 | BLOCK_ACCT_READ); |
560 | break; | |
561 | case BLKIF_OP_WRITE: | |
f3b604e3 | 562 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
563 | BLOCK_ACCT_WRITE); |
564 | break; | |
565 | case BLKIF_OP_FLUSH_DISKCACHE: | |
f3b604e3 | 566 | block_acct_invalid(blk_get_stats(dataplane->blk), |
4ea7d1a7 PD |
567 | BLOCK_ACCT_FLUSH); |
568 | default: | |
569 | break; | |
570 | }; | |
571 | ||
36d883ba | 572 | xen_block_complete_request(request); |
4ea7d1a7 PD |
573 | continue; |
574 | } | |
575 | ||
6de45f91 TS |
576 | if (inflight_atstart > IO_PLUG_THRESHOLD && |
577 | batched >= inflight_atstart) { | |
578 | blk_io_unplug(dataplane->blk); | |
579 | } | |
d4683cf9 | 580 | xen_block_do_aio(request); |
6de45f91 TS |
581 | if (inflight_atstart > IO_PLUG_THRESHOLD) { |
582 | if (batched >= inflight_atstart) { | |
583 | blk_io_plug(dataplane->blk); | |
584 | batched = 0; | |
585 | } else { | |
586 | batched++; | |
587 | } | |
588 | } | |
589 | } | |
590 | if (inflight_atstart > IO_PLUG_THRESHOLD) { | |
591 | blk_io_unplug(dataplane->blk); | |
4ea7d1a7 PD |
592 | } |
593 | ||
345f42b4 | 594 | return done_something; |
4ea7d1a7 PD |
595 | } |
596 | ||
d4683cf9 | 597 | static void xen_block_dataplane_bh(void *opaque) |
4ea7d1a7 | 598 | { |
f3b604e3 | 599 | XenBlockDataPlane *dataplane = opaque; |
4ea7d1a7 | 600 | |
f3b604e3 | 601 | aio_context_acquire(dataplane->ctx); |
d4683cf9 | 602 | xen_block_handle_requests(dataplane); |
f3b604e3 | 603 | aio_context_release(dataplane->ctx); |
4ea7d1a7 PD |
604 | } |
605 | ||
345f42b4 | 606 | static bool xen_block_dataplane_event(void *opaque) |
fcab2b46 | 607 | { |
f3b604e3 | 608 | XenBlockDataPlane *dataplane = opaque; |
fcab2b46 | 609 | |
345f42b4 | 610 | return xen_block_handle_requests(dataplane); |
fcab2b46 PD |
611 | } |
612 | ||
f3b604e3 | 613 | XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, |
5feeb718 PD |
614 | BlockBackend *blk, |
615 | unsigned int sector_size, | |
f3b604e3 | 616 | IOThread *iothread) |
4ea7d1a7 | 617 | { |
f3b604e3 | 618 | XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1); |
4ea7d1a7 | 619 | |
f3b604e3 | 620 | dataplane->xendev = xendev; |
5feeb718 PD |
621 | dataplane->blk = blk; |
622 | dataplane->sector_size = sector_size; | |
4ea7d1a7 | 623 | |
f3b604e3 | 624 | QLIST_INIT(&dataplane->inflight); |
f3b604e3 | 625 | QLIST_INIT(&dataplane->freelist); |
4ea7d1a7 | 626 | |
fcab2b46 | 627 | if (iothread) { |
f3b604e3 PD |
628 | dataplane->iothread = iothread; |
629 | object_ref(OBJECT(dataplane->iothread)); | |
630 | dataplane->ctx = iothread_get_aio_context(dataplane->iothread); | |
fcab2b46 | 631 | } else { |
f3b604e3 | 632 | dataplane->ctx = qemu_get_aio_context(); |
fcab2b46 | 633 | } |
d4683cf9 PD |
634 | dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, |
635 | dataplane); | |
fcab2b46 | 636 | |
f3b604e3 | 637 | return dataplane; |
4ea7d1a7 PD |
638 | } |
639 | ||
f3b604e3 | 640 | void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 641 | { |
e7f5b5f8 | 642 | XenBlockRequest *request; |
4ea7d1a7 | 643 | |
f3b604e3 | 644 | if (!dataplane) { |
fcab2b46 PD |
645 | return; |
646 | } | |
4ea7d1a7 | 647 | |
f3b604e3 | 648 | while (!QLIST_EMPTY(&dataplane->freelist)) { |
e7f5b5f8 PD |
649 | request = QLIST_FIRST(&dataplane->freelist); |
650 | QLIST_REMOVE(request, list); | |
651 | qemu_iovec_destroy(&request->v); | |
c6025bd1 | 652 | qemu_vfree(request->buf); |
e7f5b5f8 | 653 | g_free(request); |
4ea7d1a7 PD |
654 | } |
655 | ||
f3b604e3 PD |
656 | qemu_bh_delete(dataplane->bh); |
657 | if (dataplane->iothread) { | |
658 | object_unref(OBJECT(dataplane->iothread)); | |
fcab2b46 PD |
659 | } |
660 | ||
f3b604e3 | 661 | g_free(dataplane); |
4ea7d1a7 PD |
662 | } |
663 | ||
f3b604e3 | 664 | void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) |
4ea7d1a7 | 665 | { |
fcab2b46 | 666 | XenDevice *xendev; |
4ea7d1a7 | 667 | |
f3b604e3 | 668 | if (!dataplane) { |
fcab2b46 PD |
669 | return; |
670 | } | |
671 | ||
32d0b7be PD |
672 | xendev = dataplane->xendev; |
673 | ||
f3b604e3 | 674 | aio_context_acquire(dataplane->ctx); |
32d0b7be PD |
675 | if (dataplane->event_channel) { |
676 | /* Only reason for failure is a NULL channel */ | |
677 | xen_device_set_event_channel_context(xendev, dataplane->event_channel, | |
678 | qemu_get_aio_context(), | |
679 | &error_abort); | |
680 | } | |
97896a48 KW |
681 | /* Xen doesn't have multiple users for nodes, so this can't fail */ |
682 | blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); | |
f3b604e3 | 683 | aio_context_release(dataplane->ctx); |
fcab2b46 | 684 | |
32d0b7be PD |
685 | /* |
686 | * Now that the context has been moved onto the main thread, cancel | |
687 | * further processing. | |
688 | */ | |
689 | qemu_bh_cancel(dataplane->bh); | |
fcab2b46 | 690 | |
f3b604e3 | 691 | if (dataplane->event_channel) { |
fcab2b46 PD |
692 | Error *local_err = NULL; |
693 | ||
f3b604e3 | 694 | xen_device_unbind_event_channel(xendev, dataplane->event_channel, |
fcab2b46 | 695 | &local_err); |
f3b604e3 | 696 | dataplane->event_channel = NULL; |
fcab2b46 PD |
697 | |
698 | if (local_err) { | |
699 | error_report_err(local_err); | |
700 | } | |
701 | } | |
702 | ||
f3b604e3 | 703 | if (dataplane->sring) { |
fcab2b46 PD |
704 | Error *local_err = NULL; |
705 | ||
f3b604e3 PD |
706 | xen_device_unmap_grant_refs(xendev, dataplane->sring, |
707 | dataplane->nr_ring_ref, &local_err); | |
708 | dataplane->sring = NULL; | |
fcab2b46 PD |
709 | |
710 | if (local_err) { | |
711 | error_report_err(local_err); | |
712 | } | |
713 | } | |
714 | ||
f3b604e3 PD |
715 | g_free(dataplane->ring_ref); |
716 | dataplane->ring_ref = NULL; | |
fcab2b46 PD |
717 | } |
718 | ||
f3b604e3 | 719 | void xen_block_dataplane_start(XenBlockDataPlane *dataplane, |
fcab2b46 PD |
720 | const unsigned int ring_ref[], |
721 | unsigned int nr_ring_ref, | |
722 | unsigned int event_channel, | |
723 | unsigned int protocol, | |
724 | Error **errp) | |
725 | { | |
1de7096d | 726 | ERRP_GUARD(); |
f3b604e3 | 727 | XenDevice *xendev = dataplane->xendev; |
fcab2b46 PD |
728 | unsigned int ring_size; |
729 | unsigned int i; | |
730 | ||
f3b604e3 PD |
731 | dataplane->nr_ring_ref = nr_ring_ref; |
732 | dataplane->ring_ref = g_new(unsigned int, nr_ring_ref); | |
fcab2b46 PD |
733 | |
734 | for (i = 0; i < nr_ring_ref; i++) { | |
f3b604e3 | 735 | dataplane->ring_ref[i] = ring_ref[i]; |
fcab2b46 PD |
736 | } |
737 | ||
f3b604e3 | 738 | dataplane->protocol = protocol; |
fcab2b46 | 739 | |
f3b604e3 PD |
740 | ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref; |
741 | switch (dataplane->protocol) { | |
fcab2b46 PD |
742 | case BLKIF_PROTOCOL_NATIVE: |
743 | { | |
f3b604e3 | 744 | dataplane->max_requests = __CONST_RING_SIZE(blkif, ring_size); |
fcab2b46 PD |
745 | break; |
746 | } | |
747 | case BLKIF_PROTOCOL_X86_32: | |
748 | { | |
f3b604e3 | 749 | dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_32, ring_size); |
fcab2b46 PD |
750 | break; |
751 | } | |
752 | case BLKIF_PROTOCOL_X86_64: | |
753 | { | |
f3b604e3 | 754 | dataplane->max_requests = __CONST_RING_SIZE(blkif_x86_64, ring_size); |
fcab2b46 PD |
755 | break; |
756 | } | |
757 | default: | |
f3b604e3 | 758 | error_setg(errp, "unknown protocol %u", dataplane->protocol); |
fcab2b46 PD |
759 | return; |
760 | } | |
761 | ||
f3b604e3 | 762 | xen_device_set_max_grant_refs(xendev, dataplane->nr_ring_ref, |
1de7096d VSO |
763 | errp); |
764 | if (*errp) { | |
fcab2b46 PD |
765 | goto stop; |
766 | } | |
767 | ||
f3b604e3 PD |
768 | dataplane->sring = xen_device_map_grant_refs(xendev, |
769 | dataplane->ring_ref, | |
770 | dataplane->nr_ring_ref, | |
fcab2b46 | 771 | PROT_READ | PROT_WRITE, |
1de7096d VSO |
772 | errp); |
773 | if (*errp) { | |
fcab2b46 PD |
774 | goto stop; |
775 | } | |
776 | ||
f3b604e3 | 777 | switch (dataplane->protocol) { |
fcab2b46 PD |
778 | case BLKIF_PROTOCOL_NATIVE: |
779 | { | |
f3b604e3 | 780 | blkif_sring_t *sring_native = dataplane->sring; |
fcab2b46 | 781 | |
f3b604e3 | 782 | BACK_RING_INIT(&dataplane->rings.native, sring_native, ring_size); |
fcab2b46 PD |
783 | break; |
784 | } | |
785 | case BLKIF_PROTOCOL_X86_32: | |
786 | { | |
f3b604e3 | 787 | blkif_x86_32_sring_t *sring_x86_32 = dataplane->sring; |
fcab2b46 | 788 | |
f3b604e3 | 789 | BACK_RING_INIT(&dataplane->rings.x86_32_part, sring_x86_32, |
fcab2b46 PD |
790 | ring_size); |
791 | break; | |
792 | } | |
793 | case BLKIF_PROTOCOL_X86_64: | |
794 | { | |
f3b604e3 | 795 | blkif_x86_64_sring_t *sring_x86_64 = dataplane->sring; |
fcab2b46 | 796 | |
f3b604e3 | 797 | BACK_RING_INIT(&dataplane->rings.x86_64_part, sring_x86_64, |
fcab2b46 PD |
798 | ring_size); |
799 | break; | |
800 | } | |
801 | } | |
802 | ||
f3b604e3 | 803 | dataplane->event_channel = |
32d0b7be | 804 | xen_device_bind_event_channel(xendev, event_channel, |
d4683cf9 | 805 | xen_block_dataplane_event, dataplane, |
1de7096d VSO |
806 | errp); |
807 | if (*errp) { | |
fcab2b46 PD |
808 | goto stop; |
809 | } | |
810 | ||
f3b604e3 | 811 | aio_context_acquire(dataplane->ctx); |
97896a48 KW |
812 | /* If other users keep the BlockBackend in the iothread, that's ok */ |
813 | blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); | |
32d0b7be PD |
814 | /* Only reason for failure is a NULL channel */ |
815 | xen_device_set_event_channel_context(xendev, dataplane->event_channel, | |
816 | dataplane->ctx, &error_abort); | |
f3b604e3 | 817 | aio_context_release(dataplane->ctx); |
32d0b7be | 818 | |
fcab2b46 PD |
819 | return; |
820 | ||
821 | stop: | |
f3b604e3 | 822 | xen_block_dataplane_stop(dataplane); |
4ea7d1a7 | 823 | } |