]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-rcu.c
odp-execute: Rename 'may_steal' to 'should_steal'.
[mirror_ovs.git] / lib / ovs-rcu.c
CommitLineData
0f2ea848 1/*
f9ac0f03 2 * Copyright (c) 2014, 2017 Nicira, Inc.
0f2ea848
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9dede5cf 18#include <errno.h>
0f2ea848 19#include "ovs-rcu.h"
d2843eba 20#include "fatal-signal.h"
0f2ea848 21#include "guarded-list.h"
9a3cf0ac 22#include "latch.h"
b19bab5b 23#include "openvswitch/list.h"
0f2ea848 24#include "ovs-thread.h"
fd016ae3 25#include "openvswitch/poll-loop.h"
0f2ea848 26#include "seq.h"
214694ad 27#include "timeval.h"
ee89ea7b 28#include "util.h"
e6211adc 29#include "openvswitch/vlog.h"
214694ad
BP
30
31VLOG_DEFINE_THIS_MODULE(ovs_rcu);
0f2ea848
BP
32
33struct ovsrcu_cb {
34 void (*function)(void *aux);
35 void *aux;
36};
37
38struct ovsrcu_cbset {
ca6ba700 39 struct ovs_list list_node;
0f2ea848
BP
40 struct ovsrcu_cb cbs[16];
41 int n_cbs;
42};
43
44struct ovsrcu_perthread {
ca6ba700 45 struct ovs_list list_node; /* In global list. */
0f2ea848
BP
46
47 struct ovs_mutex mutex;
48 uint64_t seqno;
49 struct ovsrcu_cbset *cbset;
214694ad 50 char name[16]; /* This thread's name. */
0f2ea848
BP
51};
52
53static struct seq *global_seqno;
54
55static pthread_key_t perthread_key;
ca6ba700 56static struct ovs_list ovsrcu_threads;
0f2ea848
BP
57static struct ovs_mutex ovsrcu_threads_mutex;
58
59static struct guarded_list flushed_cbsets;
60static struct seq *flushed_cbsets_seq;
61
9a3cf0ac
BP
62static struct latch postpone_exit;
63static struct ovs_barrier postpone_barrier;
64
e10022d2 65static void ovsrcu_init_module(void);
9dede5cf 66static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);
0f2ea848
BP
67static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
68static void ovsrcu_unregister__(struct ovsrcu_perthread *);
69static bool ovsrcu_call_postponed(void);
70static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED);
0f2ea848
BP
71
72static struct ovsrcu_perthread *
73ovsrcu_perthread_get(void)
74{
75 struct ovsrcu_perthread *perthread;
76
e10022d2 77 ovsrcu_init_module();
0f2ea848
BP
78
79 perthread = pthread_getspecific(perthread_key);
80 if (!perthread) {
0d593ee4
BP
81 const char *name = get_subprogram_name();
82
0f2ea848
BP
83 perthread = xmalloc(sizeof *perthread);
84 ovs_mutex_init(&perthread->mutex);
85 perthread->seqno = seq_read(global_seqno);
86 perthread->cbset = NULL;
0d593ee4 87 ovs_strlcpy(perthread->name, name[0] ? name : "main",
214694ad 88 sizeof perthread->name);
0f2ea848
BP
89
90 ovs_mutex_lock(&ovsrcu_threads_mutex);
417e7e66 91 ovs_list_push_back(&ovsrcu_threads, &perthread->list_node);
0f2ea848
BP
92 ovs_mutex_unlock(&ovsrcu_threads_mutex);
93
94 pthread_setspecific(perthread_key, perthread);
95 }
96 return perthread;
97}
98
99/* Indicates the end of a quiescent state. See "Details" near the top of
100 * ovs-rcu.h.
101 *
102 * Quiescent states don't stack or nest, so this always ends a quiescent state
103 * even if ovsrcu_quiesce_start() was called multiple times in a row. */
104void
105ovsrcu_quiesce_end(void)
106{
107 ovsrcu_perthread_get();
108}
109
110static void
111ovsrcu_quiesced(void)
112{
113 if (single_threaded()) {
114 ovsrcu_call_postponed();
115 } else {
116 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
117 if (ovsthread_once_start(&once)) {
9a3cf0ac
BP
118 latch_init(&postpone_exit);
119 ovs_barrier_init(&postpone_barrier, 2);
8ba0a522 120 ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL);
0f2ea848
BP
121 ovsthread_once_done(&once);
122 }
123 }
124}
125
126/* Indicates the beginning of a quiescent state. See "Details" near the top of
127 * ovs-rcu.h. */
128void
129ovsrcu_quiesce_start(void)
130{
131 struct ovsrcu_perthread *perthread;
132
e10022d2 133 ovsrcu_init_module();
0f2ea848
BP
134 perthread = pthread_getspecific(perthread_key);
135 if (perthread) {
136 pthread_setspecific(perthread_key, NULL);
137 ovsrcu_unregister__(perthread);
138 }
139
140 ovsrcu_quiesced();
141}
142
143/* Indicates a momentary quiescent state. See "Details" near the top of
6969766b
JR
144 * ovs-rcu.h.
145 *
146 * Provides a full memory barrier via seq_change().
147 */
0f2ea848
BP
148void
149ovsrcu_quiesce(void)
150{
9c7e020f
AW
151 struct ovsrcu_perthread *perthread;
152
153 perthread = ovsrcu_perthread_get();
154 perthread->seqno = seq_read(global_seqno);
155 if (perthread->cbset) {
156 ovsrcu_flush_cbset(perthread);
157 }
0f2ea848
BP
158 seq_change(global_seqno);
159
160 ovsrcu_quiesced();
161}
162
9dede5cf
FL
163int
164ovsrcu_try_quiesce(void)
165{
166 struct ovsrcu_perthread *perthread;
167 int ret = EBUSY;
168
169 ovs_assert(!single_threaded());
170 perthread = ovsrcu_perthread_get();
171 if (!seq_try_lock()) {
172 perthread->seqno = seq_read_protected(global_seqno);
173 if (perthread->cbset) {
174 ovsrcu_flush_cbset__(perthread, true);
175 }
176 seq_change_protected(global_seqno);
177 seq_unlock();
178 ovsrcu_quiesced();
179 ret = 0;
180 }
181 return ret;
182}
183
3308c696
BP
184bool
185ovsrcu_is_quiescent(void)
186{
e10022d2 187 ovsrcu_init_module();
3308c696
BP
188 return pthread_getspecific(perthread_key) == NULL;
189}
190
0426e67c 191void
0f2ea848
BP
192ovsrcu_synchronize(void)
193{
214694ad 194 unsigned int warning_threshold = 1000;
0f2ea848 195 uint64_t target_seqno;
214694ad 196 long long int start;
0f2ea848
BP
197
198 if (single_threaded()) {
199 return;
200 }
201
202 target_seqno = seq_read(global_seqno);
203 ovsrcu_quiesce_start();
214694ad 204 start = time_msec();
0f2ea848
BP
205
206 for (;;) {
207 uint64_t cur_seqno = seq_read(global_seqno);
208 struct ovsrcu_perthread *perthread;
214694ad
BP
209 char stalled_thread[16];
210 unsigned int elapsed;
0f2ea848
BP
211 bool done = true;
212
213 ovs_mutex_lock(&ovsrcu_threads_mutex);
214 LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) {
215 if (perthread->seqno <= target_seqno) {
f9ac0f03 216 ovs_strlcpy_arrays(stalled_thread, perthread->name);
0f2ea848
BP
217 done = false;
218 break;
219 }
220 }
221 ovs_mutex_unlock(&ovsrcu_threads_mutex);
222
223 if (done) {
224 break;
225 }
226
214694ad
BP
227 elapsed = time_msec() - start;
228 if (elapsed >= warning_threshold) {
229 VLOG_WARN("blocked %u ms waiting for %s to quiesce",
230 elapsed, stalled_thread);
231 warning_threshold *= 2;
232 }
233 poll_timer_wait_until(start + warning_threshold);
234
0f2ea848
BP
235 seq_wait(global_seqno, cur_seqno);
236 poll_block();
237 }
238 ovsrcu_quiesce_end();
239}
240
9a3cf0ac
BP
241/* Waits until as many postponed callbacks as possible have executed.
242 *
243 * As a side effect, stops the background thread that calls the callbacks and
244 * prevents it from being restarted. This means that this function should only
245 * be called soon before a process exits, as a mechanism for releasing memory
246 * to make memory leaks easier to detect, since any further postponed callbacks
247 * won't actually get called.
248 *
249 * This function can only wait for callbacks registered by the current thread
250 * and the background thread that calls the callbacks. Thus, it will be most
251 * effective if other threads have already exited. */
252void
253ovsrcu_exit(void)
254{
255 /* Stop the postpone thread and wait for it to exit. Otherwise, there's no
256 * way to wait for that thread to finish calling callbacks itself. */
257 if (!single_threaded()) {
258 ovsrcu_quiesced(); /* Ensure that the postpone thread exists. */
259 latch_set(&postpone_exit);
260 ovs_barrier_block(&postpone_barrier);
261 }
262
263 /* Repeatedly:
264 *
265 * - Wait for a grace period. One important side effect is to push the
266 * running thread's cbset into 'flushed_cbsets' so that the next call
267 * has something to call.
268 *
269 * - Call all the callbacks in 'flushed_cbsets'. If there aren't any,
270 * we're done, otherwise the callbacks themselves might have requested
271 * more deferred callbacks so we go around again.
272 *
273 * We limit the number of iterations just in case some bug causes an
274 * infinite loop. This function is just for making memory leaks easier to
275 * spot so there's no point in breaking things on that basis. */
276 for (int i = 0; i < 8; i++) {
277 ovsrcu_synchronize();
278 if (!ovsrcu_call_postponed()) {
279 break;
280 }
281 }
282}
283
0f2ea848
BP
284/* Registers 'function' to be called, passing 'aux' as argument, after the
285 * next grace period.
286 *
2541d759
JR
287 * The call is guaranteed to happen after the next time all participating
288 * threads have quiesced at least once, but there is no quarantee that all
289 * registered functions are called as early as possible, or that the functions
290 * registered by different threads would be called in the order the
291 * registrations took place. In particular, even if two threads provably
292 * register a function each in a specific order, the functions may still be
293 * called in the opposite order, depending on the timing of when the threads
294 * call ovsrcu_quiesce(), how many functions they postpone, and when the
295 * ovs-rcu thread happens to grab the functions to be called.
296 *
297 * All functions registered by a single thread are guaranteed to execute in the
298 * registering order, however.
299 *
0f2ea848
BP
300 * This function is more conveniently called through the ovsrcu_postpone()
301 * macro, which provides a type-safe way to allow 'function''s parameter to be
302 * any pointer type. */
303void
304ovsrcu_postpone__(void (*function)(void *aux), void *aux)
305{
306 struct ovsrcu_perthread *perthread = ovsrcu_perthread_get();
307 struct ovsrcu_cbset *cbset;
308 struct ovsrcu_cb *cb;
309
310 cbset = perthread->cbset;
311 if (!cbset) {
312 cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset);
313 cbset->n_cbs = 0;
314 }
315
316 cb = &cbset->cbs[cbset->n_cbs++];
317 cb->function = function;
318 cb->aux = aux;
319
320 if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) {
321 ovsrcu_flush_cbset(perthread);
322 }
323}
324
325static bool
326ovsrcu_call_postponed(void)
327{
5f03c983 328 struct ovsrcu_cbset *cbset;
ca6ba700 329 struct ovs_list cbsets;
0f2ea848
BP
330
331 guarded_list_pop_all(&flushed_cbsets, &cbsets);
417e7e66 332 if (ovs_list_is_empty(&cbsets)) {
0f2ea848
BP
333 return false;
334 }
335
336 ovsrcu_synchronize();
337
5f03c983 338 LIST_FOR_EACH_POP (cbset, list_node, &cbsets) {
0f2ea848
BP
339 struct ovsrcu_cb *cb;
340
341 for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) {
342 cb->function(cb->aux);
343 }
0f2ea848
BP
344 free(cbset);
345 }
346
347 return true;
348}
349
350static void *
351ovsrcu_postpone_thread(void *arg OVS_UNUSED)
352{
353 pthread_detach(pthread_self());
354
9a3cf0ac 355 while (!latch_is_set(&postpone_exit)) {
0f2ea848
BP
356 uint64_t seqno = seq_read(flushed_cbsets_seq);
357 if (!ovsrcu_call_postponed()) {
358 seq_wait(flushed_cbsets_seq, seqno);
9a3cf0ac 359 latch_wait(&postpone_exit);
0f2ea848
BP
360 poll_block();
361 }
362 }
363
9a3cf0ac
BP
364 ovs_barrier_block(&postpone_barrier);
365 return NULL;
0f2ea848
BP
366}
367
368static void
9dede5cf 369ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected)
0f2ea848
BP
370{
371 struct ovsrcu_cbset *cbset = perthread->cbset;
372
373 if (cbset) {
374 guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX);
375 perthread->cbset = NULL;
376
9dede5cf
FL
377 if (protected) {
378 seq_change_protected(flushed_cbsets_seq);
379 } else {
380 seq_change(flushed_cbsets_seq);
381 }
0f2ea848
BP
382 }
383}
384
9dede5cf
FL
385static void
386ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
387{
388 ovsrcu_flush_cbset__(perthread, false);
389}
390
0f2ea848
BP
391static void
392ovsrcu_unregister__(struct ovsrcu_perthread *perthread)
393{
394 if (perthread->cbset) {
395 ovsrcu_flush_cbset(perthread);
396 }
397
398 ovs_mutex_lock(&ovsrcu_threads_mutex);
417e7e66 399 ovs_list_remove(&perthread->list_node);
0f2ea848
BP
400 ovs_mutex_unlock(&ovsrcu_threads_mutex);
401
402 ovs_mutex_destroy(&perthread->mutex);
403 free(perthread);
404
405 seq_change(global_seqno);
406}
407
408static void
409ovsrcu_thread_exit_cb(void *perthread)
410{
411 ovsrcu_unregister__(perthread);
412}
413
d2843eba
GS
414/* Cancels the callback to ovsrcu_thread_exit_cb().
415 *
416 * Cancelling the call to the destructor during the main thread exit
417 * is needed while using pthreads-win32 library in Windows. It has been
418 * observed that in pthreads-win32, a call to the destructor during
419 * main thread exit causes undefined behavior. */
420static void
421ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED)
422{
423 pthread_setspecific(perthread_key, NULL);
424}
425
0f2ea848 426static void
e10022d2 427ovsrcu_init_module(void)
0f2ea848
BP
428{
429 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
430 if (ovsthread_once_start(&once)) {
431 global_seqno = seq_create();
432 xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb);
d2843eba 433 fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true);
417e7e66 434 ovs_list_init(&ovsrcu_threads);
0f2ea848
BP
435 ovs_mutex_init(&ovsrcu_threads_mutex);
436
437 guarded_list_init(&flushed_cbsets);
438 flushed_cbsets_seq = seq_create();
439
440 ovsthread_once_done(&once);
441 }
442}