]>
Commit | Line | Data |
---|---|---|
62d23efa AL |
1 | /* |
2 | * xen paravirt block device backend | |
3 | * | |
4 | * (c) Gerd Hoffmann <kraxel@redhat.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; under version 2 of the License. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, | |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along | |
8167ee88 | 16 | * with this program; if not, see <http://www.gnu.org/licenses/>. |
62d23efa AL |
17 | */ |
18 | ||
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <stdarg.h> | |
22 | #include <string.h> | |
23 | #include <unistd.h> | |
24 | #include <signal.h> | |
25 | #include <inttypes.h> | |
26 | #include <time.h> | |
27 | #include <fcntl.h> | |
28 | #include <errno.h> | |
29 | #include <sys/ioctl.h> | |
30 | #include <sys/types.h> | |
31 | #include <sys/stat.h> | |
32 | #include <sys/mman.h> | |
33 | #include <sys/uio.h> | |
34 | ||
35 | #include <xs.h> | |
36 | #include <xenctrl.h> | |
37 | #include <xen/io/xenbus.h> | |
38 | ||
39 | #include "hw.h" | |
40 | #include "block_int.h" | |
41 | #include "qemu-char.h" | |
42 | #include "xen_blkif.h" | |
43 | #include "xen_backend.h" | |
2446333c | 44 | #include "blockdev.h" |
62d23efa AL |
45 | |
46 | /* ------------------------------------------------------------- */ | |
47 | ||
48 | static int syncwrite = 0; | |
49 | static int batch_maps = 0; | |
50 | ||
51 | static int max_requests = 32; | |
52 | static int use_aio = 1; | |
53 | ||
54 | /* ------------------------------------------------------------- */ | |
55 | ||
56 | #define BLOCK_SIZE 512 | |
57 | #define IOCB_COUNT (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2) | |
58 | ||
59 | struct ioreq { | |
60 | blkif_request_t req; | |
61 | int16_t status; | |
62 | ||
63 | /* parsed request */ | |
64 | off_t start; | |
65 | QEMUIOVector v; | |
66 | int presync; | |
67 | int postsync; | |
68 | ||
69 | /* grant mapping */ | |
70 | uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
71 | uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
72 | int prot; | |
73 | void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | |
74 | void *pages; | |
75 | ||
76 | /* aio status */ | |
77 | int aio_inflight; | |
78 | int aio_errors; | |
79 | ||
80 | struct XenBlkDev *blkdev; | |
72cf2d4f | 81 | QLIST_ENTRY(ioreq) list; |
62d23efa AL |
82 | }; |
83 | ||
84 | struct XenBlkDev { | |
85 | struct XenDevice xendev; /* must be first */ | |
86 | char *params; | |
87 | char *mode; | |
88 | char *type; | |
89 | char *dev; | |
90 | char *devtype; | |
91 | const char *fileproto; | |
92 | const char *filename; | |
93 | int ring_ref; | |
94 | void *sring; | |
95 | int64_t file_blk; | |
96 | int64_t file_size; | |
97 | int protocol; | |
98 | blkif_back_rings_t rings; | |
99 | int more_work; | |
100 | int cnt_map; | |
101 | ||
102 | /* request lists */ | |
72cf2d4f BS |
103 | QLIST_HEAD(inflight_head, ioreq) inflight; |
104 | QLIST_HEAD(finished_head, ioreq) finished; | |
105 | QLIST_HEAD(freelist_head, ioreq) freelist; | |
62d23efa AL |
106 | int requests_total; |
107 | int requests_inflight; | |
108 | int requests_finished; | |
109 | ||
110 | /* qemu block driver */ | |
751c6a17 | 111 | DriveInfo *dinfo; |
62d23efa AL |
112 | BlockDriverState *bs; |
113 | QEMUBH *bh; | |
114 | }; | |
115 | ||
116 | /* ------------------------------------------------------------- */ | |
117 | ||
118 | static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) | |
119 | { | |
120 | struct ioreq *ioreq = NULL; | |
121 | ||
72cf2d4f | 122 | if (QLIST_EMPTY(&blkdev->freelist)) { |
209cd7ab AP |
123 | if (blkdev->requests_total >= max_requests) { |
124 | goto out; | |
125 | } | |
126 | /* allocate new struct */ | |
127 | ioreq = qemu_mallocz(sizeof(*ioreq)); | |
128 | ioreq->blkdev = blkdev; | |
129 | blkdev->requests_total++; | |
62d23efa AL |
130 | qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
131 | } else { | |
209cd7ab AP |
132 | /* get one from freelist */ |
133 | ioreq = QLIST_FIRST(&blkdev->freelist); | |
134 | QLIST_REMOVE(ioreq, list); | |
62d23efa AL |
135 | qemu_iovec_reset(&ioreq->v); |
136 | } | |
72cf2d4f | 137 | QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); |
62d23efa AL |
138 | blkdev->requests_inflight++; |
139 | ||
140 | out: | |
141 | return ioreq; | |
142 | } | |
143 | ||
144 | static void ioreq_finish(struct ioreq *ioreq) | |
145 | { | |
146 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
147 | ||
72cf2d4f BS |
148 | QLIST_REMOVE(ioreq, list); |
149 | QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list); | |
62d23efa AL |
150 | blkdev->requests_inflight--; |
151 | blkdev->requests_finished++; | |
152 | } | |
153 | ||
154 | static void ioreq_release(struct ioreq *ioreq) | |
155 | { | |
156 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
157 | ||
72cf2d4f | 158 | QLIST_REMOVE(ioreq, list); |
62d23efa AL |
159 | memset(ioreq, 0, sizeof(*ioreq)); |
160 | ioreq->blkdev = blkdev; | |
72cf2d4f | 161 | QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); |
62d23efa AL |
162 | blkdev->requests_finished--; |
163 | } | |
164 | ||
165 | /* | |
166 | * translate request into iovec + start offset | |
167 | * do sanity checks along the way | |
168 | */ | |
169 | static int ioreq_parse(struct ioreq *ioreq) | |
170 | { | |
171 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
172 | uintptr_t mem; | |
173 | size_t len; | |
174 | int i; | |
175 | ||
176 | xen_be_printf(&blkdev->xendev, 3, | |
209cd7ab AP |
177 | "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", |
178 | ioreq->req.operation, ioreq->req.nr_segments, | |
179 | ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); | |
62d23efa AL |
180 | switch (ioreq->req.operation) { |
181 | case BLKIF_OP_READ: | |
209cd7ab AP |
182 | ioreq->prot = PROT_WRITE; /* to memory */ |
183 | break; | |
62d23efa | 184 | case BLKIF_OP_WRITE_BARRIER: |
5cbdebe3 SS |
185 | if (!ioreq->req.nr_segments) { |
186 | ioreq->presync = 1; | |
187 | return 0; | |
188 | } | |
209cd7ab AP |
189 | if (!syncwrite) { |
190 | ioreq->presync = ioreq->postsync = 1; | |
191 | } | |
192 | /* fall through */ | |
62d23efa | 193 | case BLKIF_OP_WRITE: |
209cd7ab AP |
194 | ioreq->prot = PROT_READ; /* from memory */ |
195 | if (syncwrite) { | |
196 | ioreq->postsync = 1; | |
197 | } | |
198 | break; | |
62d23efa | 199 | default: |
209cd7ab AP |
200 | xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", |
201 | ioreq->req.operation); | |
202 | goto err; | |
62d23efa AL |
203 | }; |
204 | ||
908c7b9f GH |
205 | if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') { |
206 | xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n"); | |
207 | goto err; | |
208 | } | |
209 | ||
62d23efa AL |
210 | ioreq->start = ioreq->req.sector_number * blkdev->file_blk; |
211 | for (i = 0; i < ioreq->req.nr_segments; i++) { | |
209cd7ab AP |
212 | if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
213 | xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); | |
214 | goto err; | |
215 | } | |
216 | if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { | |
217 | xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n"); | |
218 | goto err; | |
219 | } | |
220 | if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { | |
221 | xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n"); | |
222 | goto err; | |
223 | } | |
224 | ||
225 | ioreq->domids[i] = blkdev->xendev.dom; | |
226 | ioreq->refs[i] = ioreq->req.seg[i].gref; | |
227 | ||
228 | mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; | |
229 | len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; | |
62d23efa AL |
230 | qemu_iovec_add(&ioreq->v, (void*)mem, len); |
231 | } | |
232 | if (ioreq->start + ioreq->v.size > blkdev->file_size) { | |
209cd7ab AP |
233 | xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); |
234 | goto err; | |
62d23efa AL |
235 | } |
236 | return 0; | |
237 | ||
238 | err: | |
239 | ioreq->status = BLKIF_RSP_ERROR; | |
240 | return -1; | |
241 | } | |
242 | ||
243 | static void ioreq_unmap(struct ioreq *ioreq) | |
244 | { | |
d5b93ddf | 245 | XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; |
62d23efa AL |
246 | int i; |
247 | ||
209cd7ab | 248 | if (ioreq->v.niov == 0) { |
62d23efa | 249 | return; |
209cd7ab | 250 | } |
62d23efa | 251 | if (batch_maps) { |
209cd7ab AP |
252 | if (!ioreq->pages) { |
253 | return; | |
254 | } | |
255 | if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) { | |
256 | xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", | |
257 | strerror(errno)); | |
258 | } | |
259 | ioreq->blkdev->cnt_map -= ioreq->v.niov; | |
260 | ioreq->pages = NULL; | |
62d23efa | 261 | } else { |
209cd7ab AP |
262 | for (i = 0; i < ioreq->v.niov; i++) { |
263 | if (!ioreq->page[i]) { | |
264 | continue; | |
265 | } | |
266 | if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) { | |
267 | xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", | |
268 | strerror(errno)); | |
269 | } | |
270 | ioreq->blkdev->cnt_map--; | |
271 | ioreq->page[i] = NULL; | |
272 | } | |
62d23efa AL |
273 | } |
274 | } | |
275 | ||
276 | static int ioreq_map(struct ioreq *ioreq) | |
277 | { | |
d5b93ddf | 278 | XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; |
62d23efa AL |
279 | int i; |
280 | ||
209cd7ab | 281 | if (ioreq->v.niov == 0) { |
62d23efa | 282 | return 0; |
209cd7ab | 283 | } |
62d23efa | 284 | if (batch_maps) { |
209cd7ab AP |
285 | ioreq->pages = xc_gnttab_map_grant_refs |
286 | (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot); | |
287 | if (ioreq->pages == NULL) { | |
288 | xen_be_printf(&ioreq->blkdev->xendev, 0, | |
289 | "can't map %d grant refs (%s, %d maps)\n", | |
290 | ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map); | |
291 | return -1; | |
292 | } | |
293 | for (i = 0; i < ioreq->v.niov; i++) { | |
294 | ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE + | |
295 | (uintptr_t)ioreq->v.iov[i].iov_base; | |
296 | } | |
297 | ioreq->blkdev->cnt_map += ioreq->v.niov; | |
62d23efa | 298 | } else { |
209cd7ab AP |
299 | for (i = 0; i < ioreq->v.niov; i++) { |
300 | ioreq->page[i] = xc_gnttab_map_grant_ref | |
301 | (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot); | |
302 | if (ioreq->page[i] == NULL) { | |
303 | xen_be_printf(&ioreq->blkdev->xendev, 0, | |
304 | "can't map grant ref %d (%s, %d maps)\n", | |
305 | ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map); | |
306 | ioreq_unmap(ioreq); | |
307 | return -1; | |
308 | } | |
309 | ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base; | |
310 | ioreq->blkdev->cnt_map++; | |
311 | } | |
62d23efa AL |
312 | } |
313 | return 0; | |
314 | } | |
315 | ||
316 | static int ioreq_runio_qemu_sync(struct ioreq *ioreq) | |
317 | { | |
318 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
319 | int i, rc, len = 0; | |
320 | off_t pos; | |
321 | ||
209cd7ab AP |
322 | if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { |
323 | goto err_no_map; | |
324 | } | |
325 | if (ioreq->presync) { | |
326 | bdrv_flush(blkdev->bs); | |
327 | } | |
62d23efa AL |
328 | |
329 | switch (ioreq->req.operation) { | |
330 | case BLKIF_OP_READ: | |
209cd7ab AP |
331 | pos = ioreq->start; |
332 | for (i = 0; i < ioreq->v.niov; i++) { | |
333 | rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE, | |
334 | ioreq->v.iov[i].iov_base, | |
335 | ioreq->v.iov[i].iov_len / BLOCK_SIZE); | |
336 | if (rc != 0) { | |
337 | xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n", | |
338 | ioreq->v.iov[i].iov_base, | |
339 | ioreq->v.iov[i].iov_len); | |
340 | goto err; | |
341 | } | |
342 | len += ioreq->v.iov[i].iov_len; | |
343 | pos += ioreq->v.iov[i].iov_len; | |
344 | } | |
345 | break; | |
62d23efa AL |
346 | case BLKIF_OP_WRITE: |
347 | case BLKIF_OP_WRITE_BARRIER: | |
209cd7ab | 348 | if (!ioreq->req.nr_segments) { |
5cbdebe3 | 349 | break; |
209cd7ab AP |
350 | } |
351 | pos = ioreq->start; | |
352 | for (i = 0; i < ioreq->v.niov; i++) { | |
353 | rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE, | |
354 | ioreq->v.iov[i].iov_base, | |
355 | ioreq->v.iov[i].iov_len / BLOCK_SIZE); | |
356 | if (rc != 0) { | |
357 | xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n", | |
358 | ioreq->v.iov[i].iov_base, | |
359 | ioreq->v.iov[i].iov_len); | |
360 | goto err; | |
361 | } | |
362 | len += ioreq->v.iov[i].iov_len; | |
363 | pos += ioreq->v.iov[i].iov_len; | |
364 | } | |
365 | break; | |
62d23efa | 366 | default: |
209cd7ab AP |
367 | /* unknown operation (shouldn't happen -- parse catches this) */ |
368 | goto err; | |
62d23efa AL |
369 | } |
370 | ||
209cd7ab AP |
371 | if (ioreq->postsync) { |
372 | bdrv_flush(blkdev->bs); | |
373 | } | |
62d23efa AL |
374 | ioreq->status = BLKIF_RSP_OKAY; |
375 | ||
376 | ioreq_unmap(ioreq); | |
377 | ioreq_finish(ioreq); | |
378 | return 0; | |
379 | ||
380 | err: | |
f6ec953c FZ |
381 | ioreq_unmap(ioreq); |
382 | err_no_map: | |
383 | ioreq_finish(ioreq); | |
62d23efa AL |
384 | ioreq->status = BLKIF_RSP_ERROR; |
385 | return -1; | |
386 | } | |
387 | ||
388 | static void qemu_aio_complete(void *opaque, int ret) | |
389 | { | |
390 | struct ioreq *ioreq = opaque; | |
391 | ||
392 | if (ret != 0) { | |
393 | xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n", | |
394 | ioreq->req.operation == BLKIF_OP_READ ? "read" : "write"); | |
395 | ioreq->aio_errors++; | |
396 | } | |
397 | ||
398 | ioreq->aio_inflight--; | |
209cd7ab | 399 | if (ioreq->aio_inflight > 0) { |
62d23efa | 400 | return; |
209cd7ab | 401 | } |
62d23efa AL |
402 | |
403 | ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; | |
404 | ioreq_unmap(ioreq); | |
405 | ioreq_finish(ioreq); | |
406 | qemu_bh_schedule(ioreq->blkdev->bh); | |
407 | } | |
408 | ||
409 | static int ioreq_runio_qemu_aio(struct ioreq *ioreq) | |
410 | { | |
411 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
412 | ||
209cd7ab AP |
413 | if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { |
414 | goto err_no_map; | |
415 | } | |
62d23efa AL |
416 | |
417 | ioreq->aio_inflight++; | |
209cd7ab AP |
418 | if (ioreq->presync) { |
419 | bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */ | |
420 | } | |
62d23efa AL |
421 | |
422 | switch (ioreq->req.operation) { | |
423 | case BLKIF_OP_READ: | |
424 | ioreq->aio_inflight++; | |
425 | bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE, | |
426 | &ioreq->v, ioreq->v.size / BLOCK_SIZE, | |
427 | qemu_aio_complete, ioreq); | |
209cd7ab | 428 | break; |
62d23efa AL |
429 | case BLKIF_OP_WRITE: |
430 | case BLKIF_OP_WRITE_BARRIER: | |
209cd7ab | 431 | if (!ioreq->req.nr_segments) { |
5cbdebe3 | 432 | break; |
209cd7ab | 433 | } |
209bef3e | 434 | ioreq->aio_inflight++; |
62d23efa AL |
435 | bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE, |
436 | &ioreq->v, ioreq->v.size / BLOCK_SIZE, | |
437 | qemu_aio_complete, ioreq); | |
209cd7ab | 438 | break; |
62d23efa | 439 | default: |
209cd7ab AP |
440 | /* unknown operation (shouldn't happen -- parse catches this) */ |
441 | goto err; | |
62d23efa AL |
442 | } |
443 | ||
209cd7ab AP |
444 | if (ioreq->postsync) { |
445 | bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */ | |
446 | } | |
62d23efa AL |
447 | qemu_aio_complete(ioreq, 0); |
448 | ||
449 | return 0; | |
450 | ||
451 | err: | |
f6ec953c FZ |
452 | ioreq_unmap(ioreq); |
453 | err_no_map: | |
454 | ioreq_finish(ioreq); | |
62d23efa AL |
455 | ioreq->status = BLKIF_RSP_ERROR; |
456 | return -1; | |
457 | } | |
458 | ||
459 | static int blk_send_response_one(struct ioreq *ioreq) | |
460 | { | |
461 | struct XenBlkDev *blkdev = ioreq->blkdev; | |
462 | int send_notify = 0; | |
463 | int have_requests = 0; | |
464 | blkif_response_t resp; | |
465 | void *dst; | |
466 | ||
467 | resp.id = ioreq->req.id; | |
468 | resp.operation = ioreq->req.operation; | |
469 | resp.status = ioreq->status; | |
470 | ||
471 | /* Place on the response ring for the relevant domain. */ | |
472 | switch (blkdev->protocol) { | |
473 | case BLKIF_PROTOCOL_NATIVE: | |
209cd7ab AP |
474 | dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); |
475 | break; | |
62d23efa | 476 | case BLKIF_PROTOCOL_X86_32: |
6fcfeff9 BS |
477 | dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, |
478 | blkdev->rings.x86_32_part.rsp_prod_pvt); | |
209cd7ab | 479 | break; |
62d23efa | 480 | case BLKIF_PROTOCOL_X86_64: |
6fcfeff9 BS |
481 | dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, |
482 | blkdev->rings.x86_64_part.rsp_prod_pvt); | |
209cd7ab | 483 | break; |
62d23efa | 484 | default: |
209cd7ab | 485 | dst = NULL; |
62d23efa AL |
486 | } |
487 | memcpy(dst, &resp, sizeof(resp)); | |
488 | blkdev->rings.common.rsp_prod_pvt++; | |
489 | ||
490 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); | |
491 | if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) { | |
209cd7ab AP |
492 | /* |
493 | * Tail check for pending requests. Allows frontend to avoid | |
494 | * notifications if requests are already in flight (lower | |
495 | * overheads and promotes batching). | |
496 | */ | |
497 | RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); | |
62d23efa | 498 | } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) { |
209cd7ab | 499 | have_requests = 1; |
62d23efa AL |
500 | } |
501 | ||
209cd7ab AP |
502 | if (have_requests) { |
503 | blkdev->more_work++; | |
504 | } | |
62d23efa AL |
505 | return send_notify; |
506 | } | |
507 | ||
508 | /* walk finished list, send outstanding responses, free requests */ | |
509 | static void blk_send_response_all(struct XenBlkDev *blkdev) | |
510 | { | |
511 | struct ioreq *ioreq; | |
512 | int send_notify = 0; | |
513 | ||
72cf2d4f BS |
514 | while (!QLIST_EMPTY(&blkdev->finished)) { |
515 | ioreq = QLIST_FIRST(&blkdev->finished); | |
209cd7ab AP |
516 | send_notify += blk_send_response_one(ioreq); |
517 | ioreq_release(ioreq); | |
518 | } | |
519 | if (send_notify) { | |
520 | xen_be_send_notify(&blkdev->xendev); | |
62d23efa | 521 | } |
62d23efa AL |
522 | } |
523 | ||
524 | static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc) | |
525 | { | |
526 | switch (blkdev->protocol) { | |
527 | case BLKIF_PROTOCOL_NATIVE: | |
209cd7ab AP |
528 | memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), |
529 | sizeof(ioreq->req)); | |
530 | break; | |
62d23efa | 531 | case BLKIF_PROTOCOL_X86_32: |
6fcfeff9 BS |
532 | blkif_get_x86_32_req(&ioreq->req, |
533 | RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc)); | |
209cd7ab | 534 | break; |
62d23efa | 535 | case BLKIF_PROTOCOL_X86_64: |
6fcfeff9 BS |
536 | blkif_get_x86_64_req(&ioreq->req, |
537 | RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc)); | |
209cd7ab | 538 | break; |
62d23efa AL |
539 | } |
540 | return 0; | |
541 | } | |
542 | ||
543 | static void blk_handle_requests(struct XenBlkDev *blkdev) | |
544 | { | |
545 | RING_IDX rc, rp; | |
546 | struct ioreq *ioreq; | |
547 | ||
548 | blkdev->more_work = 0; | |
549 | ||
550 | rc = blkdev->rings.common.req_cons; | |
551 | rp = blkdev->rings.common.sring->req_prod; | |
552 | xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ | |
553 | ||
209cd7ab | 554 | if (use_aio) { |
62d23efa | 555 | blk_send_response_all(blkdev); |
209cd7ab | 556 | } |
fc1f79f7 | 557 | while (rc != rp) { |
62d23efa | 558 | /* pull request from ring */ |
209cd7ab | 559 | if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { |
62d23efa | 560 | break; |
209cd7ab | 561 | } |
62d23efa AL |
562 | ioreq = ioreq_start(blkdev); |
563 | if (ioreq == NULL) { | |
564 | blkdev->more_work++; | |
565 | break; | |
566 | } | |
567 | blk_get_request(blkdev, ioreq, rc); | |
568 | blkdev->rings.common.req_cons = ++rc; | |
569 | ||
570 | /* parse them */ | |
571 | if (ioreq_parse(ioreq) != 0) { | |
209cd7ab | 572 | if (blk_send_response_one(ioreq)) { |
62d23efa | 573 | xen_be_send_notify(&blkdev->xendev); |
209cd7ab | 574 | } |
62d23efa AL |
575 | ioreq_release(ioreq); |
576 | continue; | |
577 | } | |
578 | ||
579 | if (use_aio) { | |
580 | /* run i/o in aio mode */ | |
581 | ioreq_runio_qemu_aio(ioreq); | |
582 | } else { | |
583 | /* run i/o in sync mode */ | |
584 | ioreq_runio_qemu_sync(ioreq); | |
585 | } | |
586 | } | |
209cd7ab | 587 | if (!use_aio) { |
62d23efa | 588 | blk_send_response_all(blkdev); |
209cd7ab | 589 | } |
62d23efa | 590 | |
209cd7ab | 591 | if (blkdev->more_work && blkdev->requests_inflight < max_requests) { |
62d23efa | 592 | qemu_bh_schedule(blkdev->bh); |
209cd7ab | 593 | } |
62d23efa AL |
594 | } |
595 | ||
596 | /* ------------------------------------------------------------- */ | |
597 | ||
598 | static void blk_bh(void *opaque) | |
599 | { | |
600 | struct XenBlkDev *blkdev = opaque; | |
601 | blk_handle_requests(blkdev); | |
602 | } | |
603 | ||
604 | static void blk_alloc(struct XenDevice *xendev) | |
605 | { | |
606 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
607 | ||
72cf2d4f BS |
608 | QLIST_INIT(&blkdev->inflight); |
609 | QLIST_INIT(&blkdev->finished); | |
610 | QLIST_INIT(&blkdev->freelist); | |
62d23efa | 611 | blkdev->bh = qemu_bh_new(blk_bh, blkdev); |
209cd7ab | 612 | if (xen_mode != XEN_EMULATE) { |
62d23efa | 613 | batch_maps = 1; |
209cd7ab | 614 | } |
62d23efa AL |
615 | } |
616 | ||
617 | static int blk_init(struct XenDevice *xendev) | |
618 | { | |
619 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
9678d950 | 620 | int index, qflags, have_barriers, info = 0; |
62d23efa AL |
621 | char *h; |
622 | ||
623 | /* read xenstore entries */ | |
624 | if (blkdev->params == NULL) { | |
209cd7ab | 625 | blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); |
62d23efa | 626 | h = strchr(blkdev->params, ':'); |
209cd7ab AP |
627 | if (h != NULL) { |
628 | blkdev->fileproto = blkdev->params; | |
629 | blkdev->filename = h+1; | |
630 | *h = 0; | |
631 | } else { | |
632 | blkdev->fileproto = "<unset>"; | |
633 | blkdev->filename = blkdev->params; | |
634 | } | |
635 | } | |
636 | if (blkdev->mode == NULL) { | |
637 | blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); | |
638 | } | |
639 | if (blkdev->type == NULL) { | |
640 | blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); | |
641 | } | |
642 | if (blkdev->dev == NULL) { | |
643 | blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); | |
644 | } | |
645 | if (blkdev->devtype == NULL) { | |
646 | blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); | |
647 | } | |
62d23efa AL |
648 | |
649 | /* do we have all we need? */ | |
650 | if (blkdev->params == NULL || | |
209cd7ab AP |
651 | blkdev->mode == NULL || |
652 | blkdev->type == NULL || | |
653 | blkdev->dev == NULL) { | |
654 | return -1; | |
655 | } | |
62d23efa AL |
656 | |
657 | /* read-only ? */ | |
658 | if (strcmp(blkdev->mode, "w") == 0) { | |
209cd7ab | 659 | qflags = BDRV_O_RDWR; |
62d23efa | 660 | } else { |
209cd7ab AP |
661 | qflags = 0; |
662 | info |= VDISK_READONLY; | |
62d23efa AL |
663 | } |
664 | ||
665 | /* cdrom ? */ | |
209cd7ab AP |
666 | if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) { |
667 | info |= VDISK_CDROM; | |
668 | } | |
62d23efa AL |
669 | |
670 | /* init qemu block driver */ | |
751c6a17 GH |
671 | index = (blkdev->xendev.dev - 202 * 256) / 16; |
672 | blkdev->dinfo = drive_get(IF_XEN, 0, index); | |
673 | if (!blkdev->dinfo) { | |
62d23efa AL |
674 | /* setup via xenbus -> create new block driver instance */ |
675 | xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); | |
ad717139 KW |
676 | blkdev->bs = bdrv_new(blkdev->dev); |
677 | if (bdrv_open(blkdev->bs, blkdev->filename, qflags, | |
678 | bdrv_find_whitelisted_format(blkdev->fileproto)) != 0) { | |
679 | bdrv_delete(blkdev->bs); | |
680 | return -1; | |
681 | } | |
62d23efa AL |
682 | } else { |
683 | /* setup via qemu cmdline -> already setup for us */ | |
684 | xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); | |
209cd7ab | 685 | blkdev->bs = blkdev->dinfo->bdrv; |
62d23efa AL |
686 | } |
687 | blkdev->file_blk = BLOCK_SIZE; | |
688 | blkdev->file_size = bdrv_getlength(blkdev->bs); | |
689 | if (blkdev->file_size < 0) { | |
690 | xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n", | |
691 | (int)blkdev->file_size, strerror(-blkdev->file_size), | |
692 | blkdev->bs->drv ? blkdev->bs->drv->format_name : "-"); | |
209cd7ab | 693 | blkdev->file_size = 0; |
62d23efa AL |
694 | } |
695 | have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0; | |
696 | ||
697 | xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," | |
209cd7ab AP |
698 | " size %" PRId64 " (%" PRId64 " MB)\n", |
699 | blkdev->type, blkdev->fileproto, blkdev->filename, | |
700 | blkdev->file_size, blkdev->file_size >> 20); | |
62d23efa AL |
701 | |
702 | /* fill info */ | |
703 | xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers); | |
704 | xenstore_write_be_int(&blkdev->xendev, "info", info); | |
705 | xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); | |
706 | xenstore_write_be_int(&blkdev->xendev, "sectors", | |
209cd7ab | 707 | blkdev->file_size / blkdev->file_blk); |
62d23efa AL |
708 | return 0; |
709 | } | |
710 | ||
711 | static int blk_connect(struct XenDevice *xendev) | |
712 | { | |
713 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
714 | ||
209cd7ab AP |
715 | if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { |
716 | return -1; | |
717 | } | |
62d23efa | 718 | if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", |
209cd7ab AP |
719 | &blkdev->xendev.remote_port) == -1) { |
720 | return -1; | |
721 | } | |
62d23efa AL |
722 | |
723 | blkdev->protocol = BLKIF_PROTOCOL_NATIVE; | |
724 | if (blkdev->xendev.protocol) { | |
209cd7ab | 725 | if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { |
62d23efa | 726 | blkdev->protocol = BLKIF_PROTOCOL_X86_32; |
209cd7ab AP |
727 | } |
728 | if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { | |
62d23efa | 729 | blkdev->protocol = BLKIF_PROTOCOL_X86_64; |
209cd7ab | 730 | } |
62d23efa AL |
731 | } |
732 | ||
733 | blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev, | |
209cd7ab AP |
734 | blkdev->xendev.dom, |
735 | blkdev->ring_ref, | |
736 | PROT_READ | PROT_WRITE); | |
737 | if (!blkdev->sring) { | |
738 | return -1; | |
739 | } | |
62d23efa AL |
740 | blkdev->cnt_map++; |
741 | ||
742 | switch (blkdev->protocol) { | |
743 | case BLKIF_PROTOCOL_NATIVE: | |
744 | { | |
209cd7ab AP |
745 | blkif_sring_t *sring_native = blkdev->sring; |
746 | BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); | |
747 | break; | |
62d23efa AL |
748 | } |
749 | case BLKIF_PROTOCOL_X86_32: | |
750 | { | |
209cd7ab | 751 | blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; |
6fcfeff9 BS |
752 | |
753 | BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); | |
209cd7ab | 754 | break; |
62d23efa AL |
755 | } |
756 | case BLKIF_PROTOCOL_X86_64: | |
757 | { | |
209cd7ab | 758 | blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; |
6fcfeff9 BS |
759 | |
760 | BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); | |
209cd7ab | 761 | break; |
62d23efa AL |
762 | } |
763 | } | |
764 | ||
765 | xen_be_bind_evtchn(&blkdev->xendev); | |
766 | ||
767 | xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " | |
209cd7ab AP |
768 | "remote port %d, local port %d\n", |
769 | blkdev->xendev.protocol, blkdev->ring_ref, | |
770 | blkdev->xendev.remote_port, blkdev->xendev.local_port); | |
62d23efa AL |
771 | return 0; |
772 | } | |
773 | ||
774 | static void blk_disconnect(struct XenDevice *xendev) | |
775 | { | |
776 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
777 | ||
778 | if (blkdev->bs) { | |
751c6a17 | 779 | if (!blkdev->dinfo) { |
62d23efa AL |
780 | /* close/delete only if we created it ourself */ |
781 | bdrv_close(blkdev->bs); | |
782 | bdrv_delete(blkdev->bs); | |
783 | } | |
209cd7ab | 784 | blkdev->bs = NULL; |
62d23efa AL |
785 | } |
786 | xen_be_unbind_evtchn(&blkdev->xendev); | |
787 | ||
788 | if (blkdev->sring) { | |
209cd7ab AP |
789 | xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); |
790 | blkdev->cnt_map--; | |
791 | blkdev->sring = NULL; | |
62d23efa AL |
792 | } |
793 | } | |
794 | ||
795 | static int blk_free(struct XenDevice *xendev) | |
796 | { | |
797 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
798 | struct ioreq *ioreq; | |
799 | ||
72cf2d4f | 800 | while (!QLIST_EMPTY(&blkdev->freelist)) { |
209cd7ab | 801 | ioreq = QLIST_FIRST(&blkdev->freelist); |
72cf2d4f | 802 | QLIST_REMOVE(ioreq, list); |
62d23efa | 803 | qemu_iovec_destroy(&ioreq->v); |
209cd7ab | 804 | qemu_free(ioreq); |
62d23efa AL |
805 | } |
806 | ||
807 | qemu_free(blkdev->params); | |
808 | qemu_free(blkdev->mode); | |
809 | qemu_free(blkdev->type); | |
810 | qemu_free(blkdev->dev); | |
811 | qemu_free(blkdev->devtype); | |
812 | qemu_bh_delete(blkdev->bh); | |
813 | return 0; | |
814 | } | |
815 | ||
816 | static void blk_event(struct XenDevice *xendev) | |
817 | { | |
818 | struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); | |
819 | ||
820 | qemu_bh_schedule(blkdev->bh); | |
821 | } | |
822 | ||
823 | struct XenDevOps xen_blkdev_ops = { | |
824 | .size = sizeof(struct XenBlkDev), | |
825 | .flags = DEVOPS_FLAG_NEED_GNTDEV, | |
826 | .alloc = blk_alloc, | |
827 | .init = blk_init, | |
828 | .connect = blk_connect, | |
829 | .disconnect = blk_disconnect, | |
830 | .event = blk_event, | |
831 | .free = blk_free, | |
832 | }; |