]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-rcu.c
dpif-netdev: Reorder elements in dp_netdev_port structure.
[mirror_ovs.git] / lib / ovs-rcu.c
CommitLineData
0f2ea848
BP
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9dede5cf 18#include <errno.h>
0f2ea848 19#include "ovs-rcu.h"
d2843eba 20#include "fatal-signal.h"
0f2ea848 21#include "guarded-list.h"
b19bab5b 22#include "openvswitch/list.h"
0f2ea848
BP
23#include "ovs-thread.h"
24#include "poll-loop.h"
25#include "seq.h"
214694ad 26#include "timeval.h"
ee89ea7b 27#include "util.h"
e6211adc 28#include "openvswitch/vlog.h"
214694ad
BP
29
30VLOG_DEFINE_THIS_MODULE(ovs_rcu);
0f2ea848
BP
31
32struct ovsrcu_cb {
33 void (*function)(void *aux);
34 void *aux;
35};
36
37struct ovsrcu_cbset {
ca6ba700 38 struct ovs_list list_node;
0f2ea848
BP
39 struct ovsrcu_cb cbs[16];
40 int n_cbs;
41};
42
43struct ovsrcu_perthread {
ca6ba700 44 struct ovs_list list_node; /* In global list. */
0f2ea848
BP
45
46 struct ovs_mutex mutex;
47 uint64_t seqno;
48 struct ovsrcu_cbset *cbset;
214694ad 49 char name[16]; /* This thread's name. */
0f2ea848
BP
50};
51
52static struct seq *global_seqno;
53
54static pthread_key_t perthread_key;
ca6ba700 55static struct ovs_list ovsrcu_threads;
0f2ea848
BP
56static struct ovs_mutex ovsrcu_threads_mutex;
57
58static struct guarded_list flushed_cbsets;
59static struct seq *flushed_cbsets_seq;
60
e10022d2 61static void ovsrcu_init_module(void);
9dede5cf 62static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool);
0f2ea848
BP
63static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
64static void ovsrcu_unregister__(struct ovsrcu_perthread *);
65static bool ovsrcu_call_postponed(void);
66static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED);
0f2ea848
BP
67
68static struct ovsrcu_perthread *
69ovsrcu_perthread_get(void)
70{
71 struct ovsrcu_perthread *perthread;
72
e10022d2 73 ovsrcu_init_module();
0f2ea848
BP
74
75 perthread = pthread_getspecific(perthread_key);
76 if (!perthread) {
0d593ee4
BP
77 const char *name = get_subprogram_name();
78
0f2ea848
BP
79 perthread = xmalloc(sizeof *perthread);
80 ovs_mutex_init(&perthread->mutex);
81 perthread->seqno = seq_read(global_seqno);
82 perthread->cbset = NULL;
0d593ee4 83 ovs_strlcpy(perthread->name, name[0] ? name : "main",
214694ad 84 sizeof perthread->name);
0f2ea848
BP
85
86 ovs_mutex_lock(&ovsrcu_threads_mutex);
417e7e66 87 ovs_list_push_back(&ovsrcu_threads, &perthread->list_node);
0f2ea848
BP
88 ovs_mutex_unlock(&ovsrcu_threads_mutex);
89
90 pthread_setspecific(perthread_key, perthread);
91 }
92 return perthread;
93}
94
95/* Indicates the end of a quiescent state. See "Details" near the top of
96 * ovs-rcu.h.
97 *
98 * Quiescent states don't stack or nest, so this always ends a quiescent state
99 * even if ovsrcu_quiesce_start() was called multiple times in a row. */
100void
101ovsrcu_quiesce_end(void)
102{
103 ovsrcu_perthread_get();
104}
105
106static void
107ovsrcu_quiesced(void)
108{
109 if (single_threaded()) {
110 ovsrcu_call_postponed();
111 } else {
112 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
113 if (ovsthread_once_start(&once)) {
8ba0a522 114 ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL);
0f2ea848
BP
115 ovsthread_once_done(&once);
116 }
117 }
118}
119
120/* Indicates the beginning of a quiescent state. See "Details" near the top of
121 * ovs-rcu.h. */
122void
123ovsrcu_quiesce_start(void)
124{
125 struct ovsrcu_perthread *perthread;
126
e10022d2 127 ovsrcu_init_module();
0f2ea848
BP
128 perthread = pthread_getspecific(perthread_key);
129 if (perthread) {
130 pthread_setspecific(perthread_key, NULL);
131 ovsrcu_unregister__(perthread);
132 }
133
134 ovsrcu_quiesced();
135}
136
137/* Indicates a momentary quiescent state. See "Details" near the top of
6969766b
JR
138 * ovs-rcu.h.
139 *
140 * Provides a full memory barrier via seq_change().
141 */
0f2ea848
BP
142void
143ovsrcu_quiesce(void)
144{
9c7e020f
AW
145 struct ovsrcu_perthread *perthread;
146
147 perthread = ovsrcu_perthread_get();
148 perthread->seqno = seq_read(global_seqno);
149 if (perthread->cbset) {
150 ovsrcu_flush_cbset(perthread);
151 }
0f2ea848
BP
152 seq_change(global_seqno);
153
154 ovsrcu_quiesced();
155}
156
9dede5cf
FL
157int
158ovsrcu_try_quiesce(void)
159{
160 struct ovsrcu_perthread *perthread;
161 int ret = EBUSY;
162
163 ovs_assert(!single_threaded());
164 perthread = ovsrcu_perthread_get();
165 if (!seq_try_lock()) {
166 perthread->seqno = seq_read_protected(global_seqno);
167 if (perthread->cbset) {
168 ovsrcu_flush_cbset__(perthread, true);
169 }
170 seq_change_protected(global_seqno);
171 seq_unlock();
172 ovsrcu_quiesced();
173 ret = 0;
174 }
175 return ret;
176}
177
3308c696
BP
178bool
179ovsrcu_is_quiescent(void)
180{
e10022d2 181 ovsrcu_init_module();
3308c696
BP
182 return pthread_getspecific(perthread_key) == NULL;
183}
184
0426e67c 185void
0f2ea848
BP
186ovsrcu_synchronize(void)
187{
214694ad 188 unsigned int warning_threshold = 1000;
0f2ea848 189 uint64_t target_seqno;
214694ad 190 long long int start;
0f2ea848
BP
191
192 if (single_threaded()) {
193 return;
194 }
195
196 target_seqno = seq_read(global_seqno);
197 ovsrcu_quiesce_start();
214694ad 198 start = time_msec();
0f2ea848
BP
199
200 for (;;) {
201 uint64_t cur_seqno = seq_read(global_seqno);
202 struct ovsrcu_perthread *perthread;
214694ad
BP
203 char stalled_thread[16];
204 unsigned int elapsed;
0f2ea848
BP
205 bool done = true;
206
207 ovs_mutex_lock(&ovsrcu_threads_mutex);
208 LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) {
209 if (perthread->seqno <= target_seqno) {
214694ad
BP
210 ovs_strlcpy(stalled_thread, perthread->name,
211 sizeof stalled_thread);
0f2ea848
BP
212 done = false;
213 break;
214 }
215 }
216 ovs_mutex_unlock(&ovsrcu_threads_mutex);
217
218 if (done) {
219 break;
220 }
221
214694ad
BP
222 elapsed = time_msec() - start;
223 if (elapsed >= warning_threshold) {
224 VLOG_WARN("blocked %u ms waiting for %s to quiesce",
225 elapsed, stalled_thread);
226 warning_threshold *= 2;
227 }
228 poll_timer_wait_until(start + warning_threshold);
229
0f2ea848
BP
230 seq_wait(global_seqno, cur_seqno);
231 poll_block();
232 }
233 ovsrcu_quiesce_end();
234}
235
236/* Registers 'function' to be called, passing 'aux' as argument, after the
237 * next grace period.
238 *
2541d759
JR
239 * The call is guaranteed to happen after the next time all participating
240 * threads have quiesced at least once, but there is no quarantee that all
241 * registered functions are called as early as possible, or that the functions
242 * registered by different threads would be called in the order the
243 * registrations took place. In particular, even if two threads provably
244 * register a function each in a specific order, the functions may still be
245 * called in the opposite order, depending on the timing of when the threads
246 * call ovsrcu_quiesce(), how many functions they postpone, and when the
247 * ovs-rcu thread happens to grab the functions to be called.
248 *
249 * All functions registered by a single thread are guaranteed to execute in the
250 * registering order, however.
251 *
0f2ea848
BP
252 * This function is more conveniently called through the ovsrcu_postpone()
253 * macro, which provides a type-safe way to allow 'function''s parameter to be
254 * any pointer type. */
255void
256ovsrcu_postpone__(void (*function)(void *aux), void *aux)
257{
258 struct ovsrcu_perthread *perthread = ovsrcu_perthread_get();
259 struct ovsrcu_cbset *cbset;
260 struct ovsrcu_cb *cb;
261
262 cbset = perthread->cbset;
263 if (!cbset) {
264 cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset);
265 cbset->n_cbs = 0;
266 }
267
268 cb = &cbset->cbs[cbset->n_cbs++];
269 cb->function = function;
270 cb->aux = aux;
271
272 if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) {
273 ovsrcu_flush_cbset(perthread);
274 }
275}
276
277static bool
278ovsrcu_call_postponed(void)
279{
5f03c983 280 struct ovsrcu_cbset *cbset;
ca6ba700 281 struct ovs_list cbsets;
0f2ea848
BP
282
283 guarded_list_pop_all(&flushed_cbsets, &cbsets);
417e7e66 284 if (ovs_list_is_empty(&cbsets)) {
0f2ea848
BP
285 return false;
286 }
287
288 ovsrcu_synchronize();
289
5f03c983 290 LIST_FOR_EACH_POP (cbset, list_node, &cbsets) {
0f2ea848
BP
291 struct ovsrcu_cb *cb;
292
293 for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) {
294 cb->function(cb->aux);
295 }
0f2ea848
BP
296 free(cbset);
297 }
298
299 return true;
300}
301
302static void *
303ovsrcu_postpone_thread(void *arg OVS_UNUSED)
304{
305 pthread_detach(pthread_self());
306
307 for (;;) {
308 uint64_t seqno = seq_read(flushed_cbsets_seq);
309 if (!ovsrcu_call_postponed()) {
310 seq_wait(flushed_cbsets_seq, seqno);
311 poll_block();
312 }
313 }
314
315 OVS_NOT_REACHED();
316}
317
318static void
9dede5cf 319ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected)
0f2ea848
BP
320{
321 struct ovsrcu_cbset *cbset = perthread->cbset;
322
323 if (cbset) {
324 guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX);
325 perthread->cbset = NULL;
326
9dede5cf
FL
327 if (protected) {
328 seq_change_protected(flushed_cbsets_seq);
329 } else {
330 seq_change(flushed_cbsets_seq);
331 }
0f2ea848
BP
332 }
333}
334
9dede5cf
FL
335static void
336ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
337{
338 ovsrcu_flush_cbset__(perthread, false);
339}
340
0f2ea848
BP
341static void
342ovsrcu_unregister__(struct ovsrcu_perthread *perthread)
343{
344 if (perthread->cbset) {
345 ovsrcu_flush_cbset(perthread);
346 }
347
348 ovs_mutex_lock(&ovsrcu_threads_mutex);
417e7e66 349 ovs_list_remove(&perthread->list_node);
0f2ea848
BP
350 ovs_mutex_unlock(&ovsrcu_threads_mutex);
351
352 ovs_mutex_destroy(&perthread->mutex);
353 free(perthread);
354
355 seq_change(global_seqno);
356}
357
358static void
359ovsrcu_thread_exit_cb(void *perthread)
360{
361 ovsrcu_unregister__(perthread);
362}
363
d2843eba
GS
364/* Cancels the callback to ovsrcu_thread_exit_cb().
365 *
366 * Cancelling the call to the destructor during the main thread exit
367 * is needed while using pthreads-win32 library in Windows. It has been
368 * observed that in pthreads-win32, a call to the destructor during
369 * main thread exit causes undefined behavior. */
370static void
371ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED)
372{
373 pthread_setspecific(perthread_key, NULL);
374}
375
0f2ea848 376static void
e10022d2 377ovsrcu_init_module(void)
0f2ea848
BP
378{
379 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
380 if (ovsthread_once_start(&once)) {
381 global_seqno = seq_create();
382 xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb);
d2843eba 383 fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true);
417e7e66 384 ovs_list_init(&ovsrcu_threads);
0f2ea848
BP
385 ovs_mutex_init(&ovsrcu_threads_mutex);
386
387 guarded_list_init(&flushed_cbsets);
388 flushed_cbsets_seq = seq_create();
389
390 ovsthread_once_done(&once);
391 }
392}