]>
Commit | Line | Data |
---|---|---|
0f2ea848 | 1 | /* |
f9ac0f03 | 2 | * Copyright (c) 2014, 2017 Nicira, Inc. |
0f2ea848 BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
9dede5cf | 18 | #include <errno.h> |
0f2ea848 | 19 | #include "ovs-rcu.h" |
d2843eba | 20 | #include "fatal-signal.h" |
0f2ea848 | 21 | #include "guarded-list.h" |
9a3cf0ac | 22 | #include "latch.h" |
b19bab5b | 23 | #include "openvswitch/list.h" |
0f2ea848 | 24 | #include "ovs-thread.h" |
fd016ae3 | 25 | #include "openvswitch/poll-loop.h" |
0f2ea848 | 26 | #include "seq.h" |
214694ad | 27 | #include "timeval.h" |
ee89ea7b | 28 | #include "util.h" |
e6211adc | 29 | #include "openvswitch/vlog.h" |
214694ad BP |
30 | |
31 | VLOG_DEFINE_THIS_MODULE(ovs_rcu); | |
0f2ea848 BP |
32 | |
33 | struct ovsrcu_cb { | |
34 | void (*function)(void *aux); | |
35 | void *aux; | |
36 | }; | |
37 | ||
38 | struct ovsrcu_cbset { | |
ca6ba700 | 39 | struct ovs_list list_node; |
0f2ea848 BP |
40 | struct ovsrcu_cb cbs[16]; |
41 | int n_cbs; | |
42 | }; | |
43 | ||
44 | struct ovsrcu_perthread { | |
ca6ba700 | 45 | struct ovs_list list_node; /* In global list. */ |
0f2ea848 BP |
46 | |
47 | struct ovs_mutex mutex; | |
48 | uint64_t seqno; | |
49 | struct ovsrcu_cbset *cbset; | |
214694ad | 50 | char name[16]; /* This thread's name. */ |
0f2ea848 BP |
51 | }; |
52 | ||
53 | static struct seq *global_seqno; | |
54 | ||
55 | static pthread_key_t perthread_key; | |
ca6ba700 | 56 | static struct ovs_list ovsrcu_threads; |
0f2ea848 BP |
57 | static struct ovs_mutex ovsrcu_threads_mutex; |
58 | ||
59 | static struct guarded_list flushed_cbsets; | |
60 | static struct seq *flushed_cbsets_seq; | |
61 | ||
9a3cf0ac BP |
62 | static struct latch postpone_exit; |
63 | static struct ovs_barrier postpone_barrier; | |
64 | ||
e10022d2 | 65 | static void ovsrcu_init_module(void); |
9dede5cf | 66 | static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool); |
0f2ea848 BP |
67 | static void ovsrcu_flush_cbset(struct ovsrcu_perthread *); |
68 | static void ovsrcu_unregister__(struct ovsrcu_perthread *); | |
69 | static bool ovsrcu_call_postponed(void); | |
70 | static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED); | |
0f2ea848 BP |
71 | |
72 | static struct ovsrcu_perthread * | |
73 | ovsrcu_perthread_get(void) | |
74 | { | |
75 | struct ovsrcu_perthread *perthread; | |
76 | ||
e10022d2 | 77 | ovsrcu_init_module(); |
0f2ea848 BP |
78 | |
79 | perthread = pthread_getspecific(perthread_key); | |
80 | if (!perthread) { | |
0d593ee4 BP |
81 | const char *name = get_subprogram_name(); |
82 | ||
0f2ea848 BP |
83 | perthread = xmalloc(sizeof *perthread); |
84 | ovs_mutex_init(&perthread->mutex); | |
85 | perthread->seqno = seq_read(global_seqno); | |
86 | perthread->cbset = NULL; | |
0d593ee4 | 87 | ovs_strlcpy(perthread->name, name[0] ? name : "main", |
214694ad | 88 | sizeof perthread->name); |
0f2ea848 BP |
89 | |
90 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
417e7e66 | 91 | ovs_list_push_back(&ovsrcu_threads, &perthread->list_node); |
0f2ea848 BP |
92 | ovs_mutex_unlock(&ovsrcu_threads_mutex); |
93 | ||
94 | pthread_setspecific(perthread_key, perthread); | |
95 | } | |
96 | return perthread; | |
97 | } | |
98 | ||
99 | /* Indicates the end of a quiescent state. See "Details" near the top of | |
100 | * ovs-rcu.h. | |
101 | * | |
102 | * Quiescent states don't stack or nest, so this always ends a quiescent state | |
103 | * even if ovsrcu_quiesce_start() was called multiple times in a row. */ | |
104 | void | |
105 | ovsrcu_quiesce_end(void) | |
106 | { | |
107 | ovsrcu_perthread_get(); | |
108 | } | |
109 | ||
110 | static void | |
111 | ovsrcu_quiesced(void) | |
112 | { | |
113 | if (single_threaded()) { | |
114 | ovsrcu_call_postponed(); | |
115 | } else { | |
116 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
117 | if (ovsthread_once_start(&once)) { | |
9a3cf0ac BP |
118 | latch_init(&postpone_exit); |
119 | ovs_barrier_init(&postpone_barrier, 2); | |
8ba0a522 | 120 | ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL); |
0f2ea848 BP |
121 | ovsthread_once_done(&once); |
122 | } | |
123 | } | |
124 | } | |
125 | ||
126 | /* Indicates the beginning of a quiescent state. See "Details" near the top of | |
127 | * ovs-rcu.h. */ | |
128 | void | |
129 | ovsrcu_quiesce_start(void) | |
130 | { | |
131 | struct ovsrcu_perthread *perthread; | |
132 | ||
e10022d2 | 133 | ovsrcu_init_module(); |
0f2ea848 BP |
134 | perthread = pthread_getspecific(perthread_key); |
135 | if (perthread) { | |
136 | pthread_setspecific(perthread_key, NULL); | |
137 | ovsrcu_unregister__(perthread); | |
138 | } | |
139 | ||
140 | ovsrcu_quiesced(); | |
141 | } | |
142 | ||
143 | /* Indicates a momentary quiescent state. See "Details" near the top of | |
6969766b JR |
144 | * ovs-rcu.h. |
145 | * | |
146 | * Provides a full memory barrier via seq_change(). | |
147 | */ | |
0f2ea848 BP |
148 | void |
149 | ovsrcu_quiesce(void) | |
150 | { | |
9c7e020f AW |
151 | struct ovsrcu_perthread *perthread; |
152 | ||
153 | perthread = ovsrcu_perthread_get(); | |
154 | perthread->seqno = seq_read(global_seqno); | |
155 | if (perthread->cbset) { | |
156 | ovsrcu_flush_cbset(perthread); | |
157 | } | |
0f2ea848 BP |
158 | seq_change(global_seqno); |
159 | ||
160 | ovsrcu_quiesced(); | |
161 | } | |
162 | ||
9dede5cf FL |
163 | int |
164 | ovsrcu_try_quiesce(void) | |
165 | { | |
166 | struct ovsrcu_perthread *perthread; | |
167 | int ret = EBUSY; | |
168 | ||
169 | ovs_assert(!single_threaded()); | |
170 | perthread = ovsrcu_perthread_get(); | |
171 | if (!seq_try_lock()) { | |
172 | perthread->seqno = seq_read_protected(global_seqno); | |
173 | if (perthread->cbset) { | |
174 | ovsrcu_flush_cbset__(perthread, true); | |
175 | } | |
176 | seq_change_protected(global_seqno); | |
177 | seq_unlock(); | |
178 | ovsrcu_quiesced(); | |
179 | ret = 0; | |
180 | } | |
181 | return ret; | |
182 | } | |
183 | ||
3308c696 BP |
184 | bool |
185 | ovsrcu_is_quiescent(void) | |
186 | { | |
e10022d2 | 187 | ovsrcu_init_module(); |
3308c696 BP |
188 | return pthread_getspecific(perthread_key) == NULL; |
189 | } | |
190 | ||
0426e67c | 191 | void |
0f2ea848 BP |
192 | ovsrcu_synchronize(void) |
193 | { | |
214694ad | 194 | unsigned int warning_threshold = 1000; |
0f2ea848 | 195 | uint64_t target_seqno; |
214694ad | 196 | long long int start; |
0f2ea848 BP |
197 | |
198 | if (single_threaded()) { | |
199 | return; | |
200 | } | |
201 | ||
202 | target_seqno = seq_read(global_seqno); | |
203 | ovsrcu_quiesce_start(); | |
214694ad | 204 | start = time_msec(); |
0f2ea848 BP |
205 | |
206 | for (;;) { | |
207 | uint64_t cur_seqno = seq_read(global_seqno); | |
208 | struct ovsrcu_perthread *perthread; | |
214694ad BP |
209 | char stalled_thread[16]; |
210 | unsigned int elapsed; | |
0f2ea848 BP |
211 | bool done = true; |
212 | ||
213 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
214 | LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) { | |
215 | if (perthread->seqno <= target_seqno) { | |
f9ac0f03 | 216 | ovs_strlcpy_arrays(stalled_thread, perthread->name); |
0f2ea848 BP |
217 | done = false; |
218 | break; | |
219 | } | |
220 | } | |
221 | ovs_mutex_unlock(&ovsrcu_threads_mutex); | |
222 | ||
223 | if (done) { | |
224 | break; | |
225 | } | |
226 | ||
214694ad BP |
227 | elapsed = time_msec() - start; |
228 | if (elapsed >= warning_threshold) { | |
229 | VLOG_WARN("blocked %u ms waiting for %s to quiesce", | |
230 | elapsed, stalled_thread); | |
231 | warning_threshold *= 2; | |
232 | } | |
233 | poll_timer_wait_until(start + warning_threshold); | |
234 | ||
0f2ea848 BP |
235 | seq_wait(global_seqno, cur_seqno); |
236 | poll_block(); | |
237 | } | |
238 | ovsrcu_quiesce_end(); | |
239 | } | |
240 | ||
9a3cf0ac BP |
241 | /* Waits until as many postponed callbacks as possible have executed. |
242 | * | |
243 | * As a side effect, stops the background thread that calls the callbacks and | |
244 | * prevents it from being restarted. This means that this function should only | |
245 | * be called soon before a process exits, as a mechanism for releasing memory | |
246 | * to make memory leaks easier to detect, since any further postponed callbacks | |
247 | * won't actually get called. | |
248 | * | |
249 | * This function can only wait for callbacks registered by the current thread | |
250 | * and the background thread that calls the callbacks. Thus, it will be most | |
251 | * effective if other threads have already exited. */ | |
252 | void | |
253 | ovsrcu_exit(void) | |
254 | { | |
255 | /* Stop the postpone thread and wait for it to exit. Otherwise, there's no | |
256 | * way to wait for that thread to finish calling callbacks itself. */ | |
257 | if (!single_threaded()) { | |
258 | ovsrcu_quiesced(); /* Ensure that the postpone thread exists. */ | |
259 | latch_set(&postpone_exit); | |
260 | ovs_barrier_block(&postpone_barrier); | |
261 | } | |
262 | ||
263 | /* Repeatedly: | |
264 | * | |
265 | * - Wait for a grace period. One important side effect is to push the | |
266 | * running thread's cbset into 'flushed_cbsets' so that the next call | |
267 | * has something to call. | |
268 | * | |
269 | * - Call all the callbacks in 'flushed_cbsets'. If there aren't any, | |
270 | * we're done, otherwise the callbacks themselves might have requested | |
271 | * more deferred callbacks so we go around again. | |
272 | * | |
273 | * We limit the number of iterations just in case some bug causes an | |
274 | * infinite loop. This function is just for making memory leaks easier to | |
275 | * spot so there's no point in breaking things on that basis. */ | |
276 | for (int i = 0; i < 8; i++) { | |
277 | ovsrcu_synchronize(); | |
278 | if (!ovsrcu_call_postponed()) { | |
279 | break; | |
280 | } | |
281 | } | |
282 | } | |
283 | ||
0f2ea848 BP |
284 | /* Registers 'function' to be called, passing 'aux' as argument, after the |
285 | * next grace period. | |
286 | * | |
2541d759 JR |
287 | * The call is guaranteed to happen after the next time all participating |
288 | * threads have quiesced at least once, but there is no quarantee that all | |
289 | * registered functions are called as early as possible, or that the functions | |
290 | * registered by different threads would be called in the order the | |
291 | * registrations took place. In particular, even if two threads provably | |
292 | * register a function each in a specific order, the functions may still be | |
293 | * called in the opposite order, depending on the timing of when the threads | |
294 | * call ovsrcu_quiesce(), how many functions they postpone, and when the | |
295 | * ovs-rcu thread happens to grab the functions to be called. | |
296 | * | |
297 | * All functions registered by a single thread are guaranteed to execute in the | |
298 | * registering order, however. | |
299 | * | |
0f2ea848 BP |
300 | * This function is more conveniently called through the ovsrcu_postpone() |
301 | * macro, which provides a type-safe way to allow 'function''s parameter to be | |
302 | * any pointer type. */ | |
303 | void | |
304 | ovsrcu_postpone__(void (*function)(void *aux), void *aux) | |
305 | { | |
306 | struct ovsrcu_perthread *perthread = ovsrcu_perthread_get(); | |
307 | struct ovsrcu_cbset *cbset; | |
308 | struct ovsrcu_cb *cb; | |
309 | ||
310 | cbset = perthread->cbset; | |
311 | if (!cbset) { | |
312 | cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset); | |
313 | cbset->n_cbs = 0; | |
314 | } | |
315 | ||
316 | cb = &cbset->cbs[cbset->n_cbs++]; | |
317 | cb->function = function; | |
318 | cb->aux = aux; | |
319 | ||
320 | if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) { | |
321 | ovsrcu_flush_cbset(perthread); | |
322 | } | |
323 | } | |
324 | ||
325 | static bool | |
326 | ovsrcu_call_postponed(void) | |
327 | { | |
5f03c983 | 328 | struct ovsrcu_cbset *cbset; |
ca6ba700 | 329 | struct ovs_list cbsets; |
0f2ea848 BP |
330 | |
331 | guarded_list_pop_all(&flushed_cbsets, &cbsets); | |
417e7e66 | 332 | if (ovs_list_is_empty(&cbsets)) { |
0f2ea848 BP |
333 | return false; |
334 | } | |
335 | ||
336 | ovsrcu_synchronize(); | |
337 | ||
5f03c983 | 338 | LIST_FOR_EACH_POP (cbset, list_node, &cbsets) { |
0f2ea848 BP |
339 | struct ovsrcu_cb *cb; |
340 | ||
341 | for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) { | |
342 | cb->function(cb->aux); | |
343 | } | |
0f2ea848 BP |
344 | free(cbset); |
345 | } | |
346 | ||
347 | return true; | |
348 | } | |
349 | ||
350 | static void * | |
351 | ovsrcu_postpone_thread(void *arg OVS_UNUSED) | |
352 | { | |
353 | pthread_detach(pthread_self()); | |
354 | ||
9a3cf0ac | 355 | while (!latch_is_set(&postpone_exit)) { |
0f2ea848 BP |
356 | uint64_t seqno = seq_read(flushed_cbsets_seq); |
357 | if (!ovsrcu_call_postponed()) { | |
358 | seq_wait(flushed_cbsets_seq, seqno); | |
9a3cf0ac | 359 | latch_wait(&postpone_exit); |
0f2ea848 BP |
360 | poll_block(); |
361 | } | |
362 | } | |
363 | ||
9a3cf0ac BP |
364 | ovs_barrier_block(&postpone_barrier); |
365 | return NULL; | |
0f2ea848 BP |
366 | } |
367 | ||
368 | static void | |
9dede5cf | 369 | ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected) |
0f2ea848 BP |
370 | { |
371 | struct ovsrcu_cbset *cbset = perthread->cbset; | |
372 | ||
373 | if (cbset) { | |
374 | guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX); | |
375 | perthread->cbset = NULL; | |
376 | ||
9dede5cf FL |
377 | if (protected) { |
378 | seq_change_protected(flushed_cbsets_seq); | |
379 | } else { | |
380 | seq_change(flushed_cbsets_seq); | |
381 | } | |
0f2ea848 BP |
382 | } |
383 | } | |
384 | ||
9dede5cf FL |
385 | static void |
386 | ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread) | |
387 | { | |
388 | ovsrcu_flush_cbset__(perthread, false); | |
389 | } | |
390 | ||
0f2ea848 BP |
391 | static void |
392 | ovsrcu_unregister__(struct ovsrcu_perthread *perthread) | |
393 | { | |
394 | if (perthread->cbset) { | |
395 | ovsrcu_flush_cbset(perthread); | |
396 | } | |
397 | ||
398 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
417e7e66 | 399 | ovs_list_remove(&perthread->list_node); |
0f2ea848 BP |
400 | ovs_mutex_unlock(&ovsrcu_threads_mutex); |
401 | ||
402 | ovs_mutex_destroy(&perthread->mutex); | |
403 | free(perthread); | |
404 | ||
405 | seq_change(global_seqno); | |
406 | } | |
407 | ||
408 | static void | |
409 | ovsrcu_thread_exit_cb(void *perthread) | |
410 | { | |
411 | ovsrcu_unregister__(perthread); | |
412 | } | |
413 | ||
d2843eba GS |
414 | /* Cancels the callback to ovsrcu_thread_exit_cb(). |
415 | * | |
416 | * Cancelling the call to the destructor during the main thread exit | |
417 | * is needed while using pthreads-win32 library in Windows. It has been | |
418 | * observed that in pthreads-win32, a call to the destructor during | |
419 | * main thread exit causes undefined behavior. */ | |
420 | static void | |
421 | ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED) | |
422 | { | |
423 | pthread_setspecific(perthread_key, NULL); | |
424 | } | |
425 | ||
0f2ea848 | 426 | static void |
e10022d2 | 427 | ovsrcu_init_module(void) |
0f2ea848 BP |
428 | { |
429 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
430 | if (ovsthread_once_start(&once)) { | |
431 | global_seqno = seq_create(); | |
432 | xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb); | |
d2843eba | 433 | fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true); |
417e7e66 | 434 | ovs_list_init(&ovsrcu_threads); |
0f2ea848 BP |
435 | ovs_mutex_init(&ovsrcu_threads_mutex); |
436 | ||
437 | guarded_list_init(&flushed_cbsets); | |
438 | flushed_cbsets_seq = seq_create(); | |
439 | ||
440 | ovsthread_once_done(&once); | |
441 | } | |
442 | } |