]>
Commit | Line | Data |
---|---|---|
0f2ea848 | 1 | /* |
f9ac0f03 | 2 | * Copyright (c) 2014, 2017 Nicira, Inc. |
0f2ea848 BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
9dede5cf | 18 | #include <errno.h> |
0f2ea848 | 19 | #include "ovs-rcu.h" |
d2843eba | 20 | #include "fatal-signal.h" |
0f2ea848 | 21 | #include "guarded-list.h" |
9a3cf0ac | 22 | #include "latch.h" |
b19bab5b | 23 | #include "openvswitch/list.h" |
0f2ea848 | 24 | #include "ovs-thread.h" |
fd016ae3 | 25 | #include "openvswitch/poll-loop.h" |
0f2ea848 | 26 | #include "seq.h" |
214694ad | 27 | #include "timeval.h" |
ee89ea7b | 28 | #include "util.h" |
e6211adc | 29 | #include "openvswitch/vlog.h" |
214694ad BP |
30 | |
31 | VLOG_DEFINE_THIS_MODULE(ovs_rcu); | |
0f2ea848 | 32 | |
5042815d IM |
33 | #define MIN_CBS 16 |
34 | ||
0f2ea848 BP |
35 | struct ovsrcu_cb { |
36 | void (*function)(void *aux); | |
37 | void *aux; | |
38 | }; | |
39 | ||
40 | struct ovsrcu_cbset { | |
ca6ba700 | 41 | struct ovs_list list_node; |
5042815d IM |
42 | struct ovsrcu_cb *cbs; |
43 | size_t n_allocated; | |
0f2ea848 BP |
44 | int n_cbs; |
45 | }; | |
46 | ||
47 | struct ovsrcu_perthread { | |
ca6ba700 | 48 | struct ovs_list list_node; /* In global list. */ |
0f2ea848 BP |
49 | |
50 | struct ovs_mutex mutex; | |
51 | uint64_t seqno; | |
52 | struct ovsrcu_cbset *cbset; | |
214694ad | 53 | char name[16]; /* This thread's name. */ |
0f2ea848 BP |
54 | }; |
55 | ||
56 | static struct seq *global_seqno; | |
57 | ||
58 | static pthread_key_t perthread_key; | |
ca6ba700 | 59 | static struct ovs_list ovsrcu_threads; |
0f2ea848 BP |
60 | static struct ovs_mutex ovsrcu_threads_mutex; |
61 | ||
62 | static struct guarded_list flushed_cbsets; | |
63 | static struct seq *flushed_cbsets_seq; | |
64 | ||
9a3cf0ac BP |
65 | static struct latch postpone_exit; |
66 | static struct ovs_barrier postpone_barrier; | |
67 | ||
e10022d2 | 68 | static void ovsrcu_init_module(void); |
9dede5cf | 69 | static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool); |
0f2ea848 BP |
70 | static void ovsrcu_flush_cbset(struct ovsrcu_perthread *); |
71 | static void ovsrcu_unregister__(struct ovsrcu_perthread *); | |
72 | static bool ovsrcu_call_postponed(void); | |
73 | static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED); | |
0f2ea848 BP |
74 | |
75 | static struct ovsrcu_perthread * | |
76 | ovsrcu_perthread_get(void) | |
77 | { | |
78 | struct ovsrcu_perthread *perthread; | |
79 | ||
e10022d2 | 80 | ovsrcu_init_module(); |
0f2ea848 BP |
81 | |
82 | perthread = pthread_getspecific(perthread_key); | |
83 | if (!perthread) { | |
0d593ee4 BP |
84 | const char *name = get_subprogram_name(); |
85 | ||
0f2ea848 BP |
86 | perthread = xmalloc(sizeof *perthread); |
87 | ovs_mutex_init(&perthread->mutex); | |
88 | perthread->seqno = seq_read(global_seqno); | |
89 | perthread->cbset = NULL; | |
0d593ee4 | 90 | ovs_strlcpy(perthread->name, name[0] ? name : "main", |
214694ad | 91 | sizeof perthread->name); |
0f2ea848 BP |
92 | |
93 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
417e7e66 | 94 | ovs_list_push_back(&ovsrcu_threads, &perthread->list_node); |
0f2ea848 BP |
95 | ovs_mutex_unlock(&ovsrcu_threads_mutex); |
96 | ||
97 | pthread_setspecific(perthread_key, perthread); | |
98 | } | |
99 | return perthread; | |
100 | } | |
101 | ||
102 | /* Indicates the end of a quiescent state. See "Details" near the top of | |
103 | * ovs-rcu.h. | |
104 | * | |
105 | * Quiescent states don't stack or nest, so this always ends a quiescent state | |
106 | * even if ovsrcu_quiesce_start() was called multiple times in a row. */ | |
107 | void | |
108 | ovsrcu_quiesce_end(void) | |
109 | { | |
110 | ovsrcu_perthread_get(); | |
111 | } | |
112 | ||
113 | static void | |
114 | ovsrcu_quiesced(void) | |
115 | { | |
116 | if (single_threaded()) { | |
117 | ovsrcu_call_postponed(); | |
118 | } else { | |
119 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
120 | if (ovsthread_once_start(&once)) { | |
9a3cf0ac BP |
121 | latch_init(&postpone_exit); |
122 | ovs_barrier_init(&postpone_barrier, 2); | |
8ba0a522 | 123 | ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL); |
0f2ea848 BP |
124 | ovsthread_once_done(&once); |
125 | } | |
126 | } | |
127 | } | |
128 | ||
129 | /* Indicates the beginning of a quiescent state. See "Details" near the top of | |
130 | * ovs-rcu.h. */ | |
131 | void | |
132 | ovsrcu_quiesce_start(void) | |
133 | { | |
134 | struct ovsrcu_perthread *perthread; | |
135 | ||
e10022d2 | 136 | ovsrcu_init_module(); |
0f2ea848 BP |
137 | perthread = pthread_getspecific(perthread_key); |
138 | if (perthread) { | |
139 | pthread_setspecific(perthread_key, NULL); | |
140 | ovsrcu_unregister__(perthread); | |
141 | } | |
142 | ||
143 | ovsrcu_quiesced(); | |
144 | } | |
145 | ||
146 | /* Indicates a momentary quiescent state. See "Details" near the top of | |
6969766b JR |
147 | * ovs-rcu.h. |
148 | * | |
149 | * Provides a full memory barrier via seq_change(). | |
150 | */ | |
0f2ea848 BP |
151 | void |
152 | ovsrcu_quiesce(void) | |
153 | { | |
9c7e020f AW |
154 | struct ovsrcu_perthread *perthread; |
155 | ||
156 | perthread = ovsrcu_perthread_get(); | |
157 | perthread->seqno = seq_read(global_seqno); | |
158 | if (perthread->cbset) { | |
159 | ovsrcu_flush_cbset(perthread); | |
160 | } | |
0f2ea848 BP |
161 | seq_change(global_seqno); |
162 | ||
163 | ovsrcu_quiesced(); | |
164 | } | |
165 | ||
9dede5cf FL |
166 | int |
167 | ovsrcu_try_quiesce(void) | |
168 | { | |
169 | struct ovsrcu_perthread *perthread; | |
170 | int ret = EBUSY; | |
171 | ||
172 | ovs_assert(!single_threaded()); | |
173 | perthread = ovsrcu_perthread_get(); | |
174 | if (!seq_try_lock()) { | |
175 | perthread->seqno = seq_read_protected(global_seqno); | |
176 | if (perthread->cbset) { | |
177 | ovsrcu_flush_cbset__(perthread, true); | |
178 | } | |
179 | seq_change_protected(global_seqno); | |
180 | seq_unlock(); | |
181 | ovsrcu_quiesced(); | |
182 | ret = 0; | |
183 | } | |
184 | return ret; | |
185 | } | |
186 | ||
3308c696 BP |
187 | bool |
188 | ovsrcu_is_quiescent(void) | |
189 | { | |
e10022d2 | 190 | ovsrcu_init_module(); |
3308c696 BP |
191 | return pthread_getspecific(perthread_key) == NULL; |
192 | } | |
193 | ||
0426e67c | 194 | void |
0f2ea848 BP |
195 | ovsrcu_synchronize(void) |
196 | { | |
214694ad | 197 | unsigned int warning_threshold = 1000; |
0f2ea848 | 198 | uint64_t target_seqno; |
214694ad | 199 | long long int start; |
0f2ea848 BP |
200 | |
201 | if (single_threaded()) { | |
202 | return; | |
203 | } | |
204 | ||
205 | target_seqno = seq_read(global_seqno); | |
206 | ovsrcu_quiesce_start(); | |
214694ad | 207 | start = time_msec(); |
0f2ea848 BP |
208 | |
209 | for (;;) { | |
210 | uint64_t cur_seqno = seq_read(global_seqno); | |
211 | struct ovsrcu_perthread *perthread; | |
214694ad BP |
212 | char stalled_thread[16]; |
213 | unsigned int elapsed; | |
0f2ea848 BP |
214 | bool done = true; |
215 | ||
216 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
217 | LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) { | |
218 | if (perthread->seqno <= target_seqno) { | |
f9ac0f03 | 219 | ovs_strlcpy_arrays(stalled_thread, perthread->name); |
0f2ea848 BP |
220 | done = false; |
221 | break; | |
222 | } | |
223 | } | |
224 | ovs_mutex_unlock(&ovsrcu_threads_mutex); | |
225 | ||
226 | if (done) { | |
227 | break; | |
228 | } | |
229 | ||
214694ad BP |
230 | elapsed = time_msec() - start; |
231 | if (elapsed >= warning_threshold) { | |
232 | VLOG_WARN("blocked %u ms waiting for %s to quiesce", | |
233 | elapsed, stalled_thread); | |
234 | warning_threshold *= 2; | |
235 | } | |
236 | poll_timer_wait_until(start + warning_threshold); | |
237 | ||
0f2ea848 BP |
238 | seq_wait(global_seqno, cur_seqno); |
239 | poll_block(); | |
240 | } | |
241 | ovsrcu_quiesce_end(); | |
242 | } | |
243 | ||
9a3cf0ac BP |
244 | /* Waits until as many postponed callbacks as possible have executed. |
245 | * | |
246 | * As a side effect, stops the background thread that calls the callbacks and | |
247 | * prevents it from being restarted. This means that this function should only | |
248 | * be called soon before a process exits, as a mechanism for releasing memory | |
249 | * to make memory leaks easier to detect, since any further postponed callbacks | |
250 | * won't actually get called. | |
251 | * | |
252 | * This function can only wait for callbacks registered by the current thread | |
253 | * and the background thread that calls the callbacks. Thus, it will be most | |
254 | * effective if other threads have already exited. */ | |
255 | void | |
256 | ovsrcu_exit(void) | |
257 | { | |
258 | /* Stop the postpone thread and wait for it to exit. Otherwise, there's no | |
259 | * way to wait for that thread to finish calling callbacks itself. */ | |
260 | if (!single_threaded()) { | |
261 | ovsrcu_quiesced(); /* Ensure that the postpone thread exists. */ | |
262 | latch_set(&postpone_exit); | |
263 | ovs_barrier_block(&postpone_barrier); | |
264 | } | |
265 | ||
266 | /* Repeatedly: | |
267 | * | |
268 | * - Wait for a grace period. One important side effect is to push the | |
269 | * running thread's cbset into 'flushed_cbsets' so that the next call | |
270 | * has something to call. | |
271 | * | |
272 | * - Call all the callbacks in 'flushed_cbsets'. If there aren't any, | |
273 | * we're done, otherwise the callbacks themselves might have requested | |
274 | * more deferred callbacks so we go around again. | |
275 | * | |
276 | * We limit the number of iterations just in case some bug causes an | |
277 | * infinite loop. This function is just for making memory leaks easier to | |
278 | * spot so there's no point in breaking things on that basis. */ | |
279 | for (int i = 0; i < 8; i++) { | |
280 | ovsrcu_synchronize(); | |
281 | if (!ovsrcu_call_postponed()) { | |
282 | break; | |
283 | } | |
284 | } | |
285 | } | |
286 | ||
0f2ea848 BP |
287 | /* Registers 'function' to be called, passing 'aux' as argument, after the |
288 | * next grace period. | |
289 | * | |
2541d759 JR |
290 | * The call is guaranteed to happen after the next time all participating |
291 | * threads have quiesced at least once, but there is no quarantee that all | |
292 | * registered functions are called as early as possible, or that the functions | |
293 | * registered by different threads would be called in the order the | |
294 | * registrations took place. In particular, even if two threads provably | |
295 | * register a function each in a specific order, the functions may still be | |
296 | * called in the opposite order, depending on the timing of when the threads | |
297 | * call ovsrcu_quiesce(), how many functions they postpone, and when the | |
298 | * ovs-rcu thread happens to grab the functions to be called. | |
299 | * | |
300 | * All functions registered by a single thread are guaranteed to execute in the | |
301 | * registering order, however. | |
302 | * | |
0f2ea848 BP |
303 | * This function is more conveniently called through the ovsrcu_postpone() |
304 | * macro, which provides a type-safe way to allow 'function''s parameter to be | |
305 | * any pointer type. */ | |
306 | void | |
307 | ovsrcu_postpone__(void (*function)(void *aux), void *aux) | |
308 | { | |
309 | struct ovsrcu_perthread *perthread = ovsrcu_perthread_get(); | |
310 | struct ovsrcu_cbset *cbset; | |
311 | struct ovsrcu_cb *cb; | |
312 | ||
313 | cbset = perthread->cbset; | |
314 | if (!cbset) { | |
315 | cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset); | |
5042815d IM |
316 | cbset->cbs = xmalloc(MIN_CBS * sizeof *cbset->cbs); |
317 | cbset->n_allocated = MIN_CBS; | |
0f2ea848 BP |
318 | cbset->n_cbs = 0; |
319 | } | |
320 | ||
5042815d IM |
321 | if (cbset->n_cbs == cbset->n_allocated) { |
322 | cbset->cbs = x2nrealloc(cbset->cbs, &cbset->n_allocated, | |
323 | sizeof *cbset->cbs); | |
324 | } | |
325 | ||
0f2ea848 BP |
326 | cb = &cbset->cbs[cbset->n_cbs++]; |
327 | cb->function = function; | |
328 | cb->aux = aux; | |
0f2ea848 BP |
329 | } |
330 | ||
331 | static bool | |
332 | ovsrcu_call_postponed(void) | |
333 | { | |
5f03c983 | 334 | struct ovsrcu_cbset *cbset; |
ca6ba700 | 335 | struct ovs_list cbsets; |
0f2ea848 BP |
336 | |
337 | guarded_list_pop_all(&flushed_cbsets, &cbsets); | |
417e7e66 | 338 | if (ovs_list_is_empty(&cbsets)) { |
0f2ea848 BP |
339 | return false; |
340 | } | |
341 | ||
342 | ovsrcu_synchronize(); | |
343 | ||
5f03c983 | 344 | LIST_FOR_EACH_POP (cbset, list_node, &cbsets) { |
0f2ea848 BP |
345 | struct ovsrcu_cb *cb; |
346 | ||
347 | for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) { | |
348 | cb->function(cb->aux); | |
349 | } | |
5042815d | 350 | free(cbset->cbs); |
0f2ea848 BP |
351 | free(cbset); |
352 | } | |
353 | ||
354 | return true; | |
355 | } | |
356 | ||
357 | static void * | |
358 | ovsrcu_postpone_thread(void *arg OVS_UNUSED) | |
359 | { | |
360 | pthread_detach(pthread_self()); | |
361 | ||
9a3cf0ac | 362 | while (!latch_is_set(&postpone_exit)) { |
0f2ea848 BP |
363 | uint64_t seqno = seq_read(flushed_cbsets_seq); |
364 | if (!ovsrcu_call_postponed()) { | |
365 | seq_wait(flushed_cbsets_seq, seqno); | |
9a3cf0ac | 366 | latch_wait(&postpone_exit); |
0f2ea848 BP |
367 | poll_block(); |
368 | } | |
369 | } | |
370 | ||
9a3cf0ac BP |
371 | ovs_barrier_block(&postpone_barrier); |
372 | return NULL; | |
0f2ea848 BP |
373 | } |
374 | ||
375 | static void | |
9dede5cf | 376 | ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected) |
0f2ea848 BP |
377 | { |
378 | struct ovsrcu_cbset *cbset = perthread->cbset; | |
379 | ||
380 | if (cbset) { | |
381 | guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX); | |
382 | perthread->cbset = NULL; | |
383 | ||
9dede5cf FL |
384 | if (protected) { |
385 | seq_change_protected(flushed_cbsets_seq); | |
386 | } else { | |
387 | seq_change(flushed_cbsets_seq); | |
388 | } | |
0f2ea848 BP |
389 | } |
390 | } | |
391 | ||
9dede5cf FL |
392 | static void |
393 | ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread) | |
394 | { | |
395 | ovsrcu_flush_cbset__(perthread, false); | |
396 | } | |
397 | ||
0f2ea848 BP |
398 | static void |
399 | ovsrcu_unregister__(struct ovsrcu_perthread *perthread) | |
400 | { | |
401 | if (perthread->cbset) { | |
402 | ovsrcu_flush_cbset(perthread); | |
403 | } | |
404 | ||
405 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
417e7e66 | 406 | ovs_list_remove(&perthread->list_node); |
0f2ea848 BP |
407 | ovs_mutex_unlock(&ovsrcu_threads_mutex); |
408 | ||
409 | ovs_mutex_destroy(&perthread->mutex); | |
410 | free(perthread); | |
411 | ||
412 | seq_change(global_seqno); | |
413 | } | |
414 | ||
415 | static void | |
416 | ovsrcu_thread_exit_cb(void *perthread) | |
417 | { | |
418 | ovsrcu_unregister__(perthread); | |
419 | } | |
420 | ||
d2843eba GS |
421 | /* Cancels the callback to ovsrcu_thread_exit_cb(). |
422 | * | |
423 | * Cancelling the call to the destructor during the main thread exit | |
424 | * is needed while using pthreads-win32 library in Windows. It has been | |
425 | * observed that in pthreads-win32, a call to the destructor during | |
426 | * main thread exit causes undefined behavior. */ | |
427 | static void | |
428 | ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED) | |
429 | { | |
430 | pthread_setspecific(perthread_key, NULL); | |
431 | } | |
432 | ||
0f2ea848 | 433 | static void |
e10022d2 | 434 | ovsrcu_init_module(void) |
0f2ea848 BP |
435 | { |
436 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
437 | if (ovsthread_once_start(&once)) { | |
438 | global_seqno = seq_create(); | |
439 | xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb); | |
d2843eba | 440 | fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true); |
417e7e66 | 441 | ovs_list_init(&ovsrcu_threads); |
0f2ea848 BP |
442 | ovs_mutex_init(&ovsrcu_threads_mutex); |
443 | ||
444 | guarded_list_init(&flushed_cbsets); | |
445 | flushed_cbsets_seq = seq_create(); | |
446 | ||
447 | ovsthread_once_done(&once); | |
448 | } | |
449 | } |