]>
Commit | Line | Data |
---|---|---|
aead9dc9 PB |
1 | /* |
2 | * Graph lock: rwlock to protect block layer graph manipulations (add/remove | |
3 | * edges and nodes) | |
4 | * | |
5 | * Copyright (c) 2022 Red Hat | |
6 | * | |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Lesser General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2.1 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
19 | */ | |
20 | ||
21 | #include "qemu/osdep.h" | |
22 | #include "qemu/main-loop.h" | |
23 | #include "block/graph-lock.h" | |
24 | #include "block/block.h" | |
25 | #include "block/block_int.h" | |
26 | ||
4002ffdc KW |
27 | /* Dummy lock object to use for Thread Safety Analysis (TSA) */ |
28 | BdrvGraphLock graph_lock; | |
29 | ||
aead9dc9 PB |
30 | /* Protects the list of aiocontext and orphaned_reader_count */ |
31 | static QemuMutex aio_context_list_lock; | |
32 | ||
33 | /* Written and read with atomic operations. */ | |
34 | static int has_writer; | |
35 | ||
36 | /* | |
37 | * A reader coroutine could move from an AioContext to another. | |
38 | * If this happens, there is no problem from the point of view of | |
39 | * counters. The problem is that the total count becomes | |
40 | * unbalanced if one of the two AioContexts gets deleted. | |
41 | * The count of readers must remain correct, so the AioContext's | |
42 | * balance is transferred to this glboal variable. | |
43 | * Protected by aio_context_list_lock. | |
44 | */ | |
45 | static uint32_t orphaned_reader_count; | |
46 | ||
47 | /* Queue of readers waiting for the writer to finish */ | |
48 | static CoQueue reader_queue; | |
49 | ||
50 | struct BdrvGraphRWlock { | |
51 | /* How many readers are currently reading the graph. */ | |
52 | uint32_t reader_count; | |
53 | ||
54 | /* | |
55 | * List of BdrvGraphRWlock kept in graph-lock.c | |
56 | * Protected by aio_context_list_lock | |
57 | */ | |
58 | QTAILQ_ENTRY(BdrvGraphRWlock) next_aio; | |
59 | }; | |
60 | ||
61 | /* | |
62 | * List of BdrvGraphRWlock. This list ensures that each BdrvGraphRWlock | |
63 | * can safely modify only its own counter, avoid reading/writing | |
64 | * others and thus improving performances by avoiding cacheline bounces. | |
65 | */ | |
66 | static QTAILQ_HEAD(, BdrvGraphRWlock) aio_context_list = | |
67 | QTAILQ_HEAD_INITIALIZER(aio_context_list); | |
68 | ||
69 | static void __attribute__((__constructor__)) bdrv_init_graph_lock(void) | |
70 | { | |
71 | qemu_mutex_init(&aio_context_list_lock); | |
72 | qemu_co_queue_init(&reader_queue); | |
73 | } | |
74 | ||
75 | void register_aiocontext(AioContext *ctx) | |
76 | { | |
77 | ctx->bdrv_graph = g_new0(BdrvGraphRWlock, 1); | |
78 | QEMU_LOCK_GUARD(&aio_context_list_lock); | |
79 | assert(ctx->bdrv_graph->reader_count == 0); | |
80 | QTAILQ_INSERT_TAIL(&aio_context_list, ctx->bdrv_graph, next_aio); | |
81 | } | |
82 | ||
83 | void unregister_aiocontext(AioContext *ctx) | |
84 | { | |
85 | QEMU_LOCK_GUARD(&aio_context_list_lock); | |
86 | orphaned_reader_count += ctx->bdrv_graph->reader_count; | |
87 | QTAILQ_REMOVE(&aio_context_list, ctx->bdrv_graph, next_aio); | |
88 | g_free(ctx->bdrv_graph); | |
89 | } | |
90 | ||
91 | static uint32_t reader_count(void) | |
92 | { | |
93 | BdrvGraphRWlock *brdv_graph; | |
94 | uint32_t rd; | |
95 | ||
96 | QEMU_LOCK_GUARD(&aio_context_list_lock); | |
97 | ||
3202d8e4 | 98 | /* rd can temporarily be negative, but the total will *always* be >= 0 */ |
aead9dc9 PB |
99 | rd = orphaned_reader_count; |
100 | QTAILQ_FOREACH(brdv_graph, &aio_context_list, next_aio) { | |
101 | rd += qatomic_read(&brdv_graph->reader_count); | |
102 | } | |
103 | ||
104 | /* shouldn't overflow unless there are 2^31 readers */ | |
105 | assert((int32_t)rd >= 0); | |
106 | return rd; | |
107 | } | |
108 | ||
6bc30f19 | 109 | void no_coroutine_fn bdrv_graph_wrlock(void) |
aead9dc9 PB |
110 | { |
111 | GLOBAL_STATE_CODE(); | |
112 | assert(!qatomic_read(&has_writer)); | |
e6e964b8 | 113 | assert(!qemu_in_coroutine()); |
aead9dc9 PB |
114 | |
115 | /* Make sure that constantly arriving new I/O doesn't cause starvation */ | |
116 | bdrv_drain_all_begin_nopoll(); | |
117 | ||
118 | /* | |
119 | * reader_count == 0: this means writer will read has_reader as 1 | |
120 | * reader_count >= 1: we don't know if writer read has_writer == 0 or 1, | |
121 | * but we need to wait. | |
122 | * Wait by allowing other coroutine (and possible readers) to continue. | |
123 | */ | |
124 | do { | |
125 | /* | |
126 | * has_writer must be 0 while polling, otherwise we get a deadlock if | |
127 | * any callback involved during AIO_WAIT_WHILE() tries to acquire the | |
128 | * reader lock. | |
129 | */ | |
130 | qatomic_set(&has_writer, 0); | |
d805d8a2 | 131 | AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1); |
aead9dc9 PB |
132 | qatomic_set(&has_writer, 1); |
133 | ||
134 | /* | |
135 | * We want to only check reader_count() after has_writer = 1 is visible | |
136 | * to other threads. That way no more readers can sneak in after we've | |
137 | * determined reader_count() == 0. | |
138 | */ | |
139 | smp_mb(); | |
140 | } while (reader_count() >= 1); | |
141 | ||
142 | bdrv_drain_all_end(); | |
143 | } | |
144 | ||
6bc30f19 | 145 | void no_coroutine_fn bdrv_graph_wrunlock(void) |
aead9dc9 PB |
146 | { |
147 | GLOBAL_STATE_CODE(); | |
aead9dc9 PB |
148 | assert(qatomic_read(&has_writer)); |
149 | ||
ac2ae233 KW |
150 | WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { |
151 | /* | |
152 | * No need for memory barriers, this works in pair with | |
153 | * the slow path of rdlock() and both take the lock. | |
154 | */ | |
155 | qatomic_store_release(&has_writer, 0); | |
156 | ||
157 | /* Wake up all coroutines that are waiting to read the graph */ | |
158 | qemu_co_enter_all(&reader_queue, &aio_context_list_lock); | |
159 | } | |
160 | ||
aead9dc9 | 161 | /* |
ac2ae233 KW |
162 | * Run any BHs that were scheduled during the wrlock section and that |
163 | * callers might expect to have finished (in particular, this is important | |
164 | * for bdrv_schedule_unref()). | |
165 | * | |
166 | * Do this only after restarting coroutines so that nested event loops in | |
167 | * BHs don't deadlock if their condition relies on the coroutine making | |
168 | * progress. | |
aead9dc9 | 169 | */ |
ac2ae233 | 170 | aio_bh_poll(qemu_get_aio_context()); |
aead9dc9 PB |
171 | } |
172 | ||
173 | void coroutine_fn bdrv_graph_co_rdlock(void) | |
174 | { | |
175 | BdrvGraphRWlock *bdrv_graph; | |
176 | bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; | |
177 | ||
aead9dc9 PB |
178 | for (;;) { |
179 | qatomic_set(&bdrv_graph->reader_count, | |
180 | bdrv_graph->reader_count + 1); | |
181 | /* make sure writer sees reader_count before we check has_writer */ | |
182 | smp_mb(); | |
183 | ||
184 | /* | |
185 | * has_writer == 0: this means writer will read reader_count as >= 1 | |
186 | * has_writer == 1: we don't know if writer read reader_count == 0 | |
187 | * or > 0, but we need to wait anyways because | |
188 | * it will write. | |
189 | */ | |
190 | if (!qatomic_read(&has_writer)) { | |
191 | break; | |
192 | } | |
193 | ||
194 | /* | |
195 | * Synchronize access with reader_count() in bdrv_graph_wrlock(). | |
196 | * Case 1: | |
197 | * If this critical section gets executed first, reader_count will | |
198 | * decrease and the reader will go to sleep. | |
199 | * Then the writer will read reader_count that does not take into | |
200 | * account this reader, and if there's no other reader it will | |
201 | * enter the write section. | |
202 | * Case 2: | |
203 | * If reader_count() critical section gets executed first, | |
204 | * then writer will read reader_count >= 1. | |
205 | * It will wait in AIO_WAIT_WHILE(), but once it releases the lock | |
206 | * we will enter this critical section and call aio_wait_kick(). | |
207 | */ | |
208 | WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { | |
209 | /* | |
210 | * Additional check when we use the above lock to synchronize | |
211 | * with bdrv_graph_wrunlock(). | |
212 | * Case 1: | |
213 | * If this gets executed first, has_writer is still 1, so we reduce | |
214 | * reader_count and go to sleep. | |
215 | * Then the writer will set has_writer to 0 and wake up all readers, | |
216 | * us included. | |
217 | * Case 2: | |
218 | * If bdrv_graph_wrunlock() critical section gets executed first, | |
219 | * then it will set has_writer to 0 and wake up all other readers. | |
220 | * Then we execute this critical section, and therefore must check | |
221 | * again for has_writer, otherwise we sleep without any writer | |
222 | * actually running. | |
223 | */ | |
224 | if (!qatomic_read(&has_writer)) { | |
225 | return; | |
226 | } | |
227 | ||
228 | /* slow path where reader sleeps */ | |
229 | bdrv_graph->reader_count--; | |
230 | aio_wait_kick(); | |
231 | qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); | |
232 | } | |
233 | } | |
234 | } | |
235 | ||
236 | void coroutine_fn bdrv_graph_co_rdunlock(void) | |
237 | { | |
238 | BdrvGraphRWlock *bdrv_graph; | |
239 | bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; | |
240 | ||
aead9dc9 PB |
241 | qatomic_store_release(&bdrv_graph->reader_count, |
242 | bdrv_graph->reader_count - 1); | |
243 | /* make sure writer sees reader_count before we check has_writer */ | |
244 | smp_mb(); | |
245 | ||
246 | /* | |
247 | * has_writer == 0: this means reader will read reader_count decreased | |
248 | * has_writer == 1: we don't know if writer read reader_count old or | |
249 | * new. Therefore, kick again so on next iteration | |
250 | * writer will for sure read the updated value. | |
251 | */ | |
252 | if (qatomic_read(&has_writer)) { | |
253 | aio_wait_kick(); | |
254 | } | |
255 | } | |
256 | ||
257 | void bdrv_graph_rdlock_main_loop(void) | |
258 | { | |
259 | GLOBAL_STATE_CODE(); | |
260 | assert(!qemu_in_coroutine()); | |
261 | } | |
262 | ||
263 | void bdrv_graph_rdunlock_main_loop(void) | |
264 | { | |
265 | GLOBAL_STATE_CODE(); | |
266 | assert(!qemu_in_coroutine()); | |
267 | } | |
3f35f82e EGE |
268 | |
269 | void assert_bdrv_graph_readable(void) | |
270 | { | |
58a2e3f5 SH |
271 | /* reader_count() is slow due to aio_context_list_lock lock contention */ |
272 | #ifdef CONFIG_DEBUG_GRAPH_LOCK | |
3f35f82e | 273 | assert(qemu_in_main_thread() || reader_count()); |
58a2e3f5 | 274 | #endif |
3f35f82e EGE |
275 | } |
276 | ||
277 | void assert_bdrv_graph_writable(void) | |
278 | { | |
279 | assert(qemu_in_main_thread()); | |
280 | assert(qatomic_read(&has_writer)); | |
281 | } |