]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-rcu.c
ovsdb-idl: Fix iteration over tracked rows with no actual data.
[mirror_ovs.git] / lib / ovs-rcu.c
CommitLineData
0f2ea848 1/*
f9ac0f03 2 * Copyright (c) 2014, 2017 Nicira, Inc.
0f2ea848
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9dede5cf 18#include <errno.h>
0f2ea848 19#include "ovs-rcu.h"
d2843eba 20#include "fatal-signal.h"
0f2ea848 21#include "guarded-list.h"
9a3cf0ac 22#include "latch.h"
b19bab5b 23#include "openvswitch/list.h"
0f2ea848 24#include "ovs-thread.h"
fd016ae3 25#include "openvswitch/poll-loop.h"
0f2ea848 26#include "seq.h"
214694ad 27#include "timeval.h"
ee89ea7b 28#include "util.h"
e6211adc 29#include "openvswitch/vlog.h"
214694ad
BP
30
31VLOG_DEFINE_THIS_MODULE(ovs_rcu);
0f2ea848 32
5042815d
IM
33#define MIN_CBS 16
34
0f2ea848
BP
35struct ovsrcu_cb {
36 void (*function)(void *aux);
37 void *aux;
38};
39
40struct ovsrcu_cbset {
ca6ba700 41 struct ovs_list list_node;
5042815d
IM
42 struct ovsrcu_cb *cbs;
43 size_t n_allocated;
0f2ea848
BP
44 int n_cbs;
45};
46
47struct ovsrcu_perthread {
ca6ba700 48 struct ovs_list list_node; /* In global list. */
0f2ea848
BP
49
50 struct ovs_mutex mutex;
51 uint64_t seqno;
52 struct ovsrcu_cbset *cbset;
214694ad 53 char name[16]; /* This thread's name. */
0f2ea848
BP
54};
55
56static struct seq *global_seqno;
57
58static pthread_key_t perthread_key;
ca6ba700 59static struct ovs_list ovsrcu_threads;
0f2ea848
BP
60static struct ovs_mutex ovsrcu_threads_mutex;
61
62static struct guarded_list flushed_cbsets;
63static struct seq *flushed_cbsets_seq;
64
9a3cf0ac
BP
65static struct latch postpone_exit;
66static struct ovs_barrier postpone_barrier;
67
e10022d2 68static void ovsrcu_init_module(void);
9dede5cf 69static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);
0f2ea848
BP
70static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
71static void ovsrcu_unregister__(struct ovsrcu_perthread *);
72static bool ovsrcu_call_postponed(void);
73static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED);
0f2ea848
BP
74
75static struct ovsrcu_perthread *
76ovsrcu_perthread_get(void)
77{
78 struct ovsrcu_perthread *perthread;
79
e10022d2 80 ovsrcu_init_module();
0f2ea848
BP
81
82 perthread = pthread_getspecific(perthread_key);
83 if (!perthread) {
0d593ee4
BP
84 const char *name = get_subprogram_name();
85
0f2ea848
BP
86 perthread = xmalloc(sizeof *perthread);
87 ovs_mutex_init(&perthread->mutex);
88 perthread->seqno = seq_read(global_seqno);
89 perthread->cbset = NULL;
0d593ee4 90 ovs_strlcpy(perthread->name, name[0] ? name : "main",
214694ad 91 sizeof perthread->name);
0f2ea848
BP
92
93 ovs_mutex_lock(&ovsrcu_threads_mutex);
417e7e66 94 ovs_list_push_back(&ovsrcu_threads, &perthread->list_node);
0f2ea848
BP
95 ovs_mutex_unlock(&ovsrcu_threads_mutex);
96
97 pthread_setspecific(perthread_key, perthread);
98 }
99 return perthread;
100}
101
102/* Indicates the end of a quiescent state. See "Details" near the top of
103 * ovs-rcu.h.
104 *
105 * Quiescent states don't stack or nest, so this always ends a quiescent state
106 * even if ovsrcu_quiesce_start() was called multiple times in a row. */
107void
108ovsrcu_quiesce_end(void)
109{
110 ovsrcu_perthread_get();
111}
112
113static void
114ovsrcu_quiesced(void)
115{
116 if (single_threaded()) {
117 ovsrcu_call_postponed();
118 } else {
119 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
120 if (ovsthread_once_start(&once)) {
9a3cf0ac
BP
121 latch_init(&postpone_exit);
122 ovs_barrier_init(&postpone_barrier, 2);
8ba0a522 123 ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL);
0f2ea848
BP
124 ovsthread_once_done(&once);
125 }
126 }
127}
128
129/* Indicates the beginning of a quiescent state. See "Details" near the top of
130 * ovs-rcu.h. */
131void
132ovsrcu_quiesce_start(void)
133{
134 struct ovsrcu_perthread *perthread;
135
e10022d2 136 ovsrcu_init_module();
0f2ea848
BP
137 perthread = pthread_getspecific(perthread_key);
138 if (perthread) {
139 pthread_setspecific(perthread_key, NULL);
140 ovsrcu_unregister__(perthread);
141 }
142
143 ovsrcu_quiesced();
144}
145
146/* Indicates a momentary quiescent state. See "Details" near the top of
6969766b
JR
147 * ovs-rcu.h.
148 *
149 * Provides a full memory barrier via seq_change().
150 */
0f2ea848
BP
151void
152ovsrcu_quiesce(void)
153{
9c7e020f
AW
154 struct ovsrcu_perthread *perthread;
155
156 perthread = ovsrcu_perthread_get();
157 perthread->seqno = seq_read(global_seqno);
158 if (perthread->cbset) {
159 ovsrcu_flush_cbset(perthread);
160 }
0f2ea848
BP
161 seq_change(global_seqno);
162
163 ovsrcu_quiesced();
164}
165
9dede5cf
FL
166int
167ovsrcu_try_quiesce(void)
168{
169 struct ovsrcu_perthread *perthread;
170 int ret = EBUSY;
171
172 ovs_assert(!single_threaded());
173 perthread = ovsrcu_perthread_get();
174 if (!seq_try_lock()) {
175 perthread->seqno = seq_read_protected(global_seqno);
176 if (perthread->cbset) {
177 ovsrcu_flush_cbset__(perthread, true);
178 }
179 seq_change_protected(global_seqno);
180 seq_unlock();
181 ovsrcu_quiesced();
182 ret = 0;
183 }
184 return ret;
185}
186
3308c696
BP
187bool
188ovsrcu_is_quiescent(void)
189{
e10022d2 190 ovsrcu_init_module();
3308c696
BP
191 return pthread_getspecific(perthread_key) == NULL;
192}
193
0426e67c 194void
0f2ea848
BP
195ovsrcu_synchronize(void)
196{
214694ad 197 unsigned int warning_threshold = 1000;
0f2ea848 198 uint64_t target_seqno;
214694ad 199 long long int start;
0f2ea848
BP
200
201 if (single_threaded()) {
202 return;
203 }
204
205 target_seqno = seq_read(global_seqno);
206 ovsrcu_quiesce_start();
214694ad 207 start = time_msec();
0f2ea848
BP
208
209 for (;;) {
210 uint64_t cur_seqno = seq_read(global_seqno);
211 struct ovsrcu_perthread *perthread;
214694ad
BP
212 char stalled_thread[16];
213 unsigned int elapsed;
0f2ea848
BP
214 bool done = true;
215
216 ovs_mutex_lock(&ovsrcu_threads_mutex);
217 LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) {
218 if (perthread->seqno <= target_seqno) {
f9ac0f03 219 ovs_strlcpy_arrays(stalled_thread, perthread->name);
0f2ea848
BP
220 done = false;
221 break;
222 }
223 }
224 ovs_mutex_unlock(&ovsrcu_threads_mutex);
225
226 if (done) {
227 break;
228 }
229
214694ad
BP
230 elapsed = time_msec() - start;
231 if (elapsed >= warning_threshold) {
232 VLOG_WARN("blocked %u ms waiting for %s to quiesce",
233 elapsed, stalled_thread);
234 warning_threshold *= 2;
235 }
236 poll_timer_wait_until(start + warning_threshold);
237
0f2ea848
BP
238 seq_wait(global_seqno, cur_seqno);
239 poll_block();
240 }
241 ovsrcu_quiesce_end();
242}
243
9a3cf0ac
BP
244/* Waits until as many postponed callbacks as possible have executed.
245 *
246 * As a side effect, stops the background thread that calls the callbacks and
247 * prevents it from being restarted. This means that this function should only
248 * be called soon before a process exits, as a mechanism for releasing memory
249 * to make memory leaks easier to detect, since any further postponed callbacks
250 * won't actually get called.
251 *
252 * This function can only wait for callbacks registered by the current thread
253 * and the background thread that calls the callbacks. Thus, it will be most
254 * effective if other threads have already exited. */
255void
256ovsrcu_exit(void)
257{
258 /* Stop the postpone thread and wait for it to exit. Otherwise, there's no
259 * way to wait for that thread to finish calling callbacks itself. */
260 if (!single_threaded()) {
261 ovsrcu_quiesced(); /* Ensure that the postpone thread exists. */
262 latch_set(&postpone_exit);
263 ovs_barrier_block(&postpone_barrier);
264 }
265
266 /* Repeatedly:
267 *
268 * - Wait for a grace period. One important side effect is to push the
269 * running thread's cbset into 'flushed_cbsets' so that the next call
270 * has something to call.
271 *
272 * - Call all the callbacks in 'flushed_cbsets'. If there aren't any,
273 * we're done, otherwise the callbacks themselves might have requested
274 * more deferred callbacks so we go around again.
275 *
276 * We limit the number of iterations just in case some bug causes an
277 * infinite loop. This function is just for making memory leaks easier to
278 * spot so there's no point in breaking things on that basis. */
279 for (int i = 0; i < 8; i++) {
280 ovsrcu_synchronize();
281 if (!ovsrcu_call_postponed()) {
282 break;
283 }
284 }
285}
286
0f2ea848
BP
287/* Registers 'function' to be called, passing 'aux' as argument, after the
288 * next grace period.
289 *
2541d759
JR
290 * The call is guaranteed to happen after the next time all participating
291 * threads have quiesced at least once, but there is no quarantee that all
292 * registered functions are called as early as possible, or that the functions
293 * registered by different threads would be called in the order the
294 * registrations took place. In particular, even if two threads provably
295 * register a function each in a specific order, the functions may still be
296 * called in the opposite order, depending on the timing of when the threads
297 * call ovsrcu_quiesce(), how many functions they postpone, and when the
298 * ovs-rcu thread happens to grab the functions to be called.
299 *
300 * All functions registered by a single thread are guaranteed to execute in the
301 * registering order, however.
302 *
0f2ea848
BP
303 * This function is more conveniently called through the ovsrcu_postpone()
304 * macro, which provides a type-safe way to allow 'function''s parameter to be
305 * any pointer type. */
306void
307ovsrcu_postpone__(void (*function)(void *aux), void *aux)
308{
309 struct ovsrcu_perthread *perthread = ovsrcu_perthread_get();
310 struct ovsrcu_cbset *cbset;
311 struct ovsrcu_cb *cb;
312
313 cbset = perthread->cbset;
314 if (!cbset) {
315 cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset);
5042815d
IM
316 cbset->cbs = xmalloc(MIN_CBS * sizeof *cbset->cbs);
317 cbset->n_allocated = MIN_CBS;
0f2ea848
BP
318 cbset->n_cbs = 0;
319 }
320
5042815d
IM
321 if (cbset->n_cbs == cbset->n_allocated) {
322 cbset->cbs = x2nrealloc(cbset->cbs, &cbset->n_allocated,
323 sizeof *cbset->cbs);
324 }
325
0f2ea848
BP
326 cb = &cbset->cbs[cbset->n_cbs++];
327 cb->function = function;
328 cb->aux = aux;
0f2ea848
BP
329}
330
331static bool
332ovsrcu_call_postponed(void)
333{
5f03c983 334 struct ovsrcu_cbset *cbset;
ca6ba700 335 struct ovs_list cbsets;
0f2ea848
BP
336
337 guarded_list_pop_all(&flushed_cbsets, &cbsets);
417e7e66 338 if (ovs_list_is_empty(&cbsets)) {
0f2ea848
BP
339 return false;
340 }
341
342 ovsrcu_synchronize();
343
5f03c983 344 LIST_FOR_EACH_POP (cbset, list_node, &cbsets) {
0f2ea848
BP
345 struct ovsrcu_cb *cb;
346
347 for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) {
348 cb->function(cb->aux);
349 }
5042815d 350 free(cbset->cbs);
0f2ea848
BP
351 free(cbset);
352 }
353
354 return true;
355}
356
357static void *
358ovsrcu_postpone_thread(void *arg OVS_UNUSED)
359{
360 pthread_detach(pthread_self());
361
9a3cf0ac 362 while (!latch_is_set(&postpone_exit)) {
0f2ea848
BP
363 uint64_t seqno = seq_read(flushed_cbsets_seq);
364 if (!ovsrcu_call_postponed()) {
365 seq_wait(flushed_cbsets_seq, seqno);
9a3cf0ac 366 latch_wait(&postpone_exit);
0f2ea848
BP
367 poll_block();
368 }
369 }
370
9a3cf0ac
BP
371 ovs_barrier_block(&postpone_barrier);
372 return NULL;
0f2ea848
BP
373}
374
375static void
9dede5cf 376ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected)
0f2ea848
BP
377{
378 struct ovsrcu_cbset *cbset = perthread->cbset;
379
380 if (cbset) {
381 guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX);
382 perthread->cbset = NULL;
383
9dede5cf
FL
384 if (protected) {
385 seq_change_protected(flushed_cbsets_seq);
386 } else {
387 seq_change(flushed_cbsets_seq);
388 }
0f2ea848
BP
389 }
390}
391
9dede5cf
FL
392static void
393ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
394{
395 ovsrcu_flush_cbset__(perthread, false);
396}
397
0f2ea848
BP
398static void
399ovsrcu_unregister__(struct ovsrcu_perthread *perthread)
400{
401 if (perthread->cbset) {
402 ovsrcu_flush_cbset(perthread);
403 }
404
405 ovs_mutex_lock(&ovsrcu_threads_mutex);
417e7e66 406 ovs_list_remove(&perthread->list_node);
0f2ea848
BP
407 ovs_mutex_unlock(&ovsrcu_threads_mutex);
408
409 ovs_mutex_destroy(&perthread->mutex);
410 free(perthread);
411
412 seq_change(global_seqno);
413}
414
415static void
416ovsrcu_thread_exit_cb(void *perthread)
417{
418 ovsrcu_unregister__(perthread);
419}
420
d2843eba
GS
421/* Cancels the callback to ovsrcu_thread_exit_cb().
422 *
423 * Cancelling the call to the destructor during the main thread exit
424 * is needed while using pthreads-win32 library in Windows. It has been
425 * observed that in pthreads-win32, a call to the destructor during
426 * main thread exit causes undefined behavior. */
427static void
428ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED)
429{
430 pthread_setspecific(perthread_key, NULL);
431}
432
0f2ea848 433static void
e10022d2 434ovsrcu_init_module(void)
0f2ea848
BP
435{
436 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
437 if (ovsthread_once_start(&once)) {
438 global_seqno = seq_create();
439 xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb);
d2843eba 440 fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true);
417e7e66 441 ovs_list_init(&ovsrcu_threads);
0f2ea848
BP
442 ovs_mutex_init(&ovsrcu_threads_mutex);
443
444 guarded_list_init(&flushed_cbsets);
445 flushed_cbsets_seq = seq_create();
446
447 ovsthread_once_done(&once);
448 }
449}