]>
Commit | Line | Data |
---|---|---|
a6b1d5d0 AD |
1 | From patchwork Thu Aug 13 08:41:50 2015 |
2 | Content-Type: text/plain; charset="utf-8" | |
3 | MIME-Version: 1.0 | |
4 | Content-Transfer-Encoding: 7bit | |
5 | Subject: mirror: Fix coroutine reentrance | |
6 | From: Kevin Wolf <kwolf@redhat.com> | |
7 | X-Patchwork-Id: 506888 | |
8 | Message-Id: <1439455310-11263-1-git-send-email-kwolf@redhat.com> | |
9 | To: qemu-block@nongnu.org | |
10 | Cc: kwolf@redhat.com, famz@redhat.com, jcody@redhat.com, | |
11 | qemu-stable@nongnu.org, | |
12 | qemu-devel@nongnu.org, stefanha@redhat.com, pbonzini@redhat.com | |
13 | Date: Thu, 13 Aug 2015 10:41:50 +0200 | |
14 | ||
15 | This fixes a regression introduced by commit dcfb3beb ("mirror: Do zero | |
16 | write on target if sectors not allocated"), which was reported to cause | |
17 | aborts with the message "Co-routine re-entered recursively". | |
18 | ||
19 | The cause for this bug is the following code in mirror_iteration_done(): | |
20 | ||
21 | if (s->common.busy) { | |
22 | qemu_coroutine_enter(s->common.co, NULL); | |
23 | } | |
24 | ||
25 | This has always been ugly because - unlike most places that reenter - it | |
26 | doesn't have a specific yield that it pairs with, but is more | |
27 | uncontrolled. What we really mean here is "reenter the coroutine if | |
28 | it's in one of the four explicit yields in mirror.c". | |
29 | ||
30 | This used to be equivalent with s->common.busy because neither | |
31 | mirror_run() nor mirror_iteration() call any function that could yield. | |
32 | However since commit dcfb3beb this doesn't hold true any more: | |
33 | bdrv_get_block_status_above() can yield. | |
34 | ||
35 | So what happens is that bdrv_get_block_status_above() wants to take a | |
36 | lock that is already held, so it adds itself to the queue of waiting | |
37 | coroutines and yields. Instead of being woken up by the unlock function, | |
38 | however, it gets woken up by mirror_iteration_done(), which is obviously | |
39 | wrong. | |
40 | ||
41 | In most cases the code actually happens to cope fairly well with such | |
42 | cases, but in this specific case, the unlock must already have scheduled | |
43 | the coroutine for wakeup when mirror_iteration_done() reentered it. And | |
44 | then the coroutine happened to process the scheduled restarts and tried | |
45 | to reenter itself recursively. | |
46 | ||
47 | This patch fixes the problem by pairing the reenter in | |
48 | mirror_iteration_done() with specific yields instead of abusing | |
49 | s->common.busy. | |
50 | ||
51 | Cc: qemu-stable@nongnu.org | |
52 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | |
53 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | |
54 | --- | |
55 | block/mirror.c | 15 ++++++++++----- | |
56 | 1 file changed, 10 insertions(+), 5 deletions(-) | |
57 | ||
58 | diff --git a/block/mirror.c b/block/mirror.c | |
59 | index fc4d8f5..b2fb4b9 100644 | |
60 | --- a/block/mirror.c | |
61 | +++ b/block/mirror.c | |
62 | @@ -60,6 +60,7 @@ typedef struct MirrorBlockJob { | |
63 | int sectors_in_flight; | |
64 | int ret; | |
65 | bool unmap; | |
66 | + bool waiting_for_io; | |
67 | } MirrorBlockJob; | |
68 | ||
69 | typedef struct MirrorOp { | |
70 | @@ -114,11 +115,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) | |
71 | qemu_iovec_destroy(&op->qiov); | |
72 | g_slice_free(MirrorOp, op); | |
73 | ||
74 | - /* Enter coroutine when it is not sleeping. The coroutine sleeps to | |
75 | - * rate-limit itself. The coroutine will eventually resume since there is | |
76 | - * a sleep timeout so don't wake it early. | |
77 | - */ | |
78 | - if (s->common.busy) { | |
79 | + if (s->waiting_for_io) { | |
80 | qemu_coroutine_enter(s->common.co, NULL); | |
81 | } | |
82 | } | |
83 | @@ -203,7 +200,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | |
84 | /* Wait for I/O to this cluster (from a previous iteration) to be done. */ | |
85 | while (test_bit(next_chunk, s->in_flight_bitmap)) { | |
86 | trace_mirror_yield_in_flight(s, sector_num, s->in_flight); | |
87 | + s->waiting_for_io = true; | |
88 | qemu_coroutine_yield(); | |
89 | + s->waiting_for_io = false; | |
90 | } | |
91 | ||
92 | do { | |
93 | @@ -239,7 +238,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) | |
94 | */ | |
95 | while (nb_chunks == 0 && s->buf_free_count < added_chunks) { | |
96 | trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); | |
97 | + s->waiting_for_io = true; | |
98 | qemu_coroutine_yield(); | |
99 | + s->waiting_for_io = false; | |
100 | } | |
101 | if (s->buf_free_count < nb_chunks + added_chunks) { | |
102 | trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); | |
103 | @@ -333,7 +334,9 @@ static void mirror_free_init(MirrorBlockJob *s) | |
104 | static void mirror_drain(MirrorBlockJob *s) | |
105 | { | |
106 | while (s->in_flight > 0) { | |
107 | + s->waiting_for_io = true; | |
108 | qemu_coroutine_yield(); | |
109 | + s->waiting_for_io = false; | |
110 | } | |
111 | } | |
112 | ||
113 | @@ -506,7 +509,9 @@ static void coroutine_fn mirror_run(void *opaque) | |
114 | if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || | |
115 | (cnt == 0 && s->in_flight > 0)) { | |
116 | trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); | |
117 | + s->waiting_for_io = true; | |
118 | qemu_coroutine_yield(); | |
119 | + s->waiting_for_io = false; | |
120 | continue; | |
121 | } else if (cnt != 0) { | |
122 | delay_ns = mirror_iteration(s); |