]>
Commit | Line | Data |
---|---|---|
0f2ea848 BP |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
9dede5cf | 18 | #include <errno.h> |
0f2ea848 | 19 | #include "ovs-rcu.h" |
d2843eba | 20 | #include "fatal-signal.h" |
0f2ea848 | 21 | #include "guarded-list.h" |
b19bab5b | 22 | #include "openvswitch/list.h" |
0f2ea848 BP |
23 | #include "ovs-thread.h" |
24 | #include "poll-loop.h" | |
25 | #include "seq.h" | |
214694ad | 26 | #include "timeval.h" |
ee89ea7b | 27 | #include "util.h" |
e6211adc | 28 | #include "openvswitch/vlog.h" |
214694ad BP |
29 | |
30 | VLOG_DEFINE_THIS_MODULE(ovs_rcu); | |
0f2ea848 BP |
31 | |
32 | struct ovsrcu_cb { | |
33 | void (*function)(void *aux); | |
34 | void *aux; | |
35 | }; | |
36 | ||
37 | struct ovsrcu_cbset { | |
ca6ba700 | 38 | struct ovs_list list_node; |
0f2ea848 BP |
39 | struct ovsrcu_cb cbs[16]; |
40 | int n_cbs; | |
41 | }; | |
42 | ||
43 | struct ovsrcu_perthread { | |
ca6ba700 | 44 | struct ovs_list list_node; /* In global list. */ |
0f2ea848 BP |
45 | |
46 | struct ovs_mutex mutex; | |
47 | uint64_t seqno; | |
48 | struct ovsrcu_cbset *cbset; | |
214694ad | 49 | char name[16]; /* This thread's name. */ |
0f2ea848 BP |
50 | }; |
51 | ||
52 | static struct seq *global_seqno; | |
53 | ||
54 | static pthread_key_t perthread_key; | |
ca6ba700 | 55 | static struct ovs_list ovsrcu_threads; |
0f2ea848 BP |
56 | static struct ovs_mutex ovsrcu_threads_mutex; |
57 | ||
58 | static struct guarded_list flushed_cbsets; | |
59 | static struct seq *flushed_cbsets_seq; | |
60 | ||
e10022d2 | 61 | static void ovsrcu_init_module(void); |
9dede5cf | 62 | static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool); |
0f2ea848 BP |
63 | static void ovsrcu_flush_cbset(struct ovsrcu_perthread *); |
64 | static void ovsrcu_unregister__(struct ovsrcu_perthread *); | |
65 | static bool ovsrcu_call_postponed(void); | |
66 | static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED); | |
0f2ea848 BP |
67 | |
68 | static struct ovsrcu_perthread * | |
69 | ovsrcu_perthread_get(void) | |
70 | { | |
71 | struct ovsrcu_perthread *perthread; | |
72 | ||
e10022d2 | 73 | ovsrcu_init_module(); |
0f2ea848 BP |
74 | |
75 | perthread = pthread_getspecific(perthread_key); | |
76 | if (!perthread) { | |
0d593ee4 BP |
77 | const char *name = get_subprogram_name(); |
78 | ||
0f2ea848 BP |
79 | perthread = xmalloc(sizeof *perthread); |
80 | ovs_mutex_init(&perthread->mutex); | |
81 | perthread->seqno = seq_read(global_seqno); | |
82 | perthread->cbset = NULL; | |
0d593ee4 | 83 | ovs_strlcpy(perthread->name, name[0] ? name : "main", |
214694ad | 84 | sizeof perthread->name); |
0f2ea848 BP |
85 | |
86 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
417e7e66 | 87 | ovs_list_push_back(&ovsrcu_threads, &perthread->list_node); |
0f2ea848 BP |
88 | ovs_mutex_unlock(&ovsrcu_threads_mutex); |
89 | ||
90 | pthread_setspecific(perthread_key, perthread); | |
91 | } | |
92 | return perthread; | |
93 | } | |
94 | ||
95 | /* Indicates the end of a quiescent state. See "Details" near the top of | |
96 | * ovs-rcu.h. | |
97 | * | |
98 | * Quiescent states don't stack or nest, so this always ends a quiescent state | |
99 | * even if ovsrcu_quiesce_start() was called multiple times in a row. */ | |
100 | void | |
101 | ovsrcu_quiesce_end(void) | |
102 | { | |
103 | ovsrcu_perthread_get(); | |
104 | } | |
105 | ||
106 | static void | |
107 | ovsrcu_quiesced(void) | |
108 | { | |
109 | if (single_threaded()) { | |
110 | ovsrcu_call_postponed(); | |
111 | } else { | |
112 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
113 | if (ovsthread_once_start(&once)) { | |
8ba0a522 | 114 | ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL); |
0f2ea848 BP |
115 | ovsthread_once_done(&once); |
116 | } | |
117 | } | |
118 | } | |
119 | ||
120 | /* Indicates the beginning of a quiescent state. See "Details" near the top of | |
121 | * ovs-rcu.h. */ | |
122 | void | |
123 | ovsrcu_quiesce_start(void) | |
124 | { | |
125 | struct ovsrcu_perthread *perthread; | |
126 | ||
e10022d2 | 127 | ovsrcu_init_module(); |
0f2ea848 BP |
128 | perthread = pthread_getspecific(perthread_key); |
129 | if (perthread) { | |
130 | pthread_setspecific(perthread_key, NULL); | |
131 | ovsrcu_unregister__(perthread); | |
132 | } | |
133 | ||
134 | ovsrcu_quiesced(); | |
135 | } | |
136 | ||
137 | /* Indicates a momentary quiescent state. See "Details" near the top of | |
6969766b JR |
138 | * ovs-rcu.h. |
139 | * | |
140 | * Provides a full memory barrier via seq_change(). | |
141 | */ | |
0f2ea848 BP |
142 | void |
143 | ovsrcu_quiesce(void) | |
144 | { | |
9c7e020f AW |
145 | struct ovsrcu_perthread *perthread; |
146 | ||
147 | perthread = ovsrcu_perthread_get(); | |
148 | perthread->seqno = seq_read(global_seqno); | |
149 | if (perthread->cbset) { | |
150 | ovsrcu_flush_cbset(perthread); | |
151 | } | |
0f2ea848 BP |
152 | seq_change(global_seqno); |
153 | ||
154 | ovsrcu_quiesced(); | |
155 | } | |
156 | ||
9dede5cf FL |
157 | int |
158 | ovsrcu_try_quiesce(void) | |
159 | { | |
160 | struct ovsrcu_perthread *perthread; | |
161 | int ret = EBUSY; | |
162 | ||
163 | ovs_assert(!single_threaded()); | |
164 | perthread = ovsrcu_perthread_get(); | |
165 | if (!seq_try_lock()) { | |
166 | perthread->seqno = seq_read_protected(global_seqno); | |
167 | if (perthread->cbset) { | |
168 | ovsrcu_flush_cbset__(perthread, true); | |
169 | } | |
170 | seq_change_protected(global_seqno); | |
171 | seq_unlock(); | |
172 | ovsrcu_quiesced(); | |
173 | ret = 0; | |
174 | } | |
175 | return ret; | |
176 | } | |
177 | ||
3308c696 BP |
178 | bool |
179 | ovsrcu_is_quiescent(void) | |
180 | { | |
e10022d2 | 181 | ovsrcu_init_module(); |
3308c696 BP |
182 | return pthread_getspecific(perthread_key) == NULL; |
183 | } | |
184 | ||
0426e67c | 185 | void |
0f2ea848 BP |
186 | ovsrcu_synchronize(void) |
187 | { | |
214694ad | 188 | unsigned int warning_threshold = 1000; |
0f2ea848 | 189 | uint64_t target_seqno; |
214694ad | 190 | long long int start; |
0f2ea848 BP |
191 | |
192 | if (single_threaded()) { | |
193 | return; | |
194 | } | |
195 | ||
196 | target_seqno = seq_read(global_seqno); | |
197 | ovsrcu_quiesce_start(); | |
214694ad | 198 | start = time_msec(); |
0f2ea848 BP |
199 | |
200 | for (;;) { | |
201 | uint64_t cur_seqno = seq_read(global_seqno); | |
202 | struct ovsrcu_perthread *perthread; | |
214694ad BP |
203 | char stalled_thread[16]; |
204 | unsigned int elapsed; | |
0f2ea848 BP |
205 | bool done = true; |
206 | ||
207 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
208 | LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) { | |
209 | if (perthread->seqno <= target_seqno) { | |
214694ad BP |
210 | ovs_strlcpy(stalled_thread, perthread->name, |
211 | sizeof stalled_thread); | |
0f2ea848 BP |
212 | done = false; |
213 | break; | |
214 | } | |
215 | } | |
216 | ovs_mutex_unlock(&ovsrcu_threads_mutex); | |
217 | ||
218 | if (done) { | |
219 | break; | |
220 | } | |
221 | ||
214694ad BP |
222 | elapsed = time_msec() - start; |
223 | if (elapsed >= warning_threshold) { | |
224 | VLOG_WARN("blocked %u ms waiting for %s to quiesce", | |
225 | elapsed, stalled_thread); | |
226 | warning_threshold *= 2; | |
227 | } | |
228 | poll_timer_wait_until(start + warning_threshold); | |
229 | ||
0f2ea848 BP |
230 | seq_wait(global_seqno, cur_seqno); |
231 | poll_block(); | |
232 | } | |
233 | ovsrcu_quiesce_end(); | |
234 | } | |
235 | ||
236 | /* Registers 'function' to be called, passing 'aux' as argument, after the | |
237 | * next grace period. | |
238 | * | |
2541d759 JR |
239 | * The call is guaranteed to happen after the next time all participating |
240 | * threads have quiesced at least once, but there is no quarantee that all | |
241 | * registered functions are called as early as possible, or that the functions | |
242 | * registered by different threads would be called in the order the | |
243 | * registrations took place. In particular, even if two threads provably | |
244 | * register a function each in a specific order, the functions may still be | |
245 | * called in the opposite order, depending on the timing of when the threads | |
246 | * call ovsrcu_quiesce(), how many functions they postpone, and when the | |
247 | * ovs-rcu thread happens to grab the functions to be called. | |
248 | * | |
249 | * All functions registered by a single thread are guaranteed to execute in the | |
250 | * registering order, however. | |
251 | * | |
0f2ea848 BP |
252 | * This function is more conveniently called through the ovsrcu_postpone() |
253 | * macro, which provides a type-safe way to allow 'function''s parameter to be | |
254 | * any pointer type. */ | |
255 | void | |
256 | ovsrcu_postpone__(void (*function)(void *aux), void *aux) | |
257 | { | |
258 | struct ovsrcu_perthread *perthread = ovsrcu_perthread_get(); | |
259 | struct ovsrcu_cbset *cbset; | |
260 | struct ovsrcu_cb *cb; | |
261 | ||
262 | cbset = perthread->cbset; | |
263 | if (!cbset) { | |
264 | cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset); | |
265 | cbset->n_cbs = 0; | |
266 | } | |
267 | ||
268 | cb = &cbset->cbs[cbset->n_cbs++]; | |
269 | cb->function = function; | |
270 | cb->aux = aux; | |
271 | ||
272 | if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) { | |
273 | ovsrcu_flush_cbset(perthread); | |
274 | } | |
275 | } | |
276 | ||
277 | static bool | |
278 | ovsrcu_call_postponed(void) | |
279 | { | |
5f03c983 | 280 | struct ovsrcu_cbset *cbset; |
ca6ba700 | 281 | struct ovs_list cbsets; |
0f2ea848 BP |
282 | |
283 | guarded_list_pop_all(&flushed_cbsets, &cbsets); | |
417e7e66 | 284 | if (ovs_list_is_empty(&cbsets)) { |
0f2ea848 BP |
285 | return false; |
286 | } | |
287 | ||
288 | ovsrcu_synchronize(); | |
289 | ||
5f03c983 | 290 | LIST_FOR_EACH_POP (cbset, list_node, &cbsets) { |
0f2ea848 BP |
291 | struct ovsrcu_cb *cb; |
292 | ||
293 | for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) { | |
294 | cb->function(cb->aux); | |
295 | } | |
0f2ea848 BP |
296 | free(cbset); |
297 | } | |
298 | ||
299 | return true; | |
300 | } | |
301 | ||
302 | static void * | |
303 | ovsrcu_postpone_thread(void *arg OVS_UNUSED) | |
304 | { | |
305 | pthread_detach(pthread_self()); | |
306 | ||
307 | for (;;) { | |
308 | uint64_t seqno = seq_read(flushed_cbsets_seq); | |
309 | if (!ovsrcu_call_postponed()) { | |
310 | seq_wait(flushed_cbsets_seq, seqno); | |
311 | poll_block(); | |
312 | } | |
313 | } | |
314 | ||
315 | OVS_NOT_REACHED(); | |
316 | } | |
317 | ||
318 | static void | |
9dede5cf | 319 | ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool protected) |
0f2ea848 BP |
320 | { |
321 | struct ovsrcu_cbset *cbset = perthread->cbset; | |
322 | ||
323 | if (cbset) { | |
324 | guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX); | |
325 | perthread->cbset = NULL; | |
326 | ||
9dede5cf FL |
327 | if (protected) { |
328 | seq_change_protected(flushed_cbsets_seq); | |
329 | } else { | |
330 | seq_change(flushed_cbsets_seq); | |
331 | } | |
0f2ea848 BP |
332 | } |
333 | } | |
334 | ||
9dede5cf FL |
335 | static void |
336 | ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread) | |
337 | { | |
338 | ovsrcu_flush_cbset__(perthread, false); | |
339 | } | |
340 | ||
0f2ea848 BP |
341 | static void |
342 | ovsrcu_unregister__(struct ovsrcu_perthread *perthread) | |
343 | { | |
344 | if (perthread->cbset) { | |
345 | ovsrcu_flush_cbset(perthread); | |
346 | } | |
347 | ||
348 | ovs_mutex_lock(&ovsrcu_threads_mutex); | |
417e7e66 | 349 | ovs_list_remove(&perthread->list_node); |
0f2ea848 BP |
350 | ovs_mutex_unlock(&ovsrcu_threads_mutex); |
351 | ||
352 | ovs_mutex_destroy(&perthread->mutex); | |
353 | free(perthread); | |
354 | ||
355 | seq_change(global_seqno); | |
356 | } | |
357 | ||
358 | static void | |
359 | ovsrcu_thread_exit_cb(void *perthread) | |
360 | { | |
361 | ovsrcu_unregister__(perthread); | |
362 | } | |
363 | ||
d2843eba GS |
364 | /* Cancels the callback to ovsrcu_thread_exit_cb(). |
365 | * | |
366 | * Cancelling the call to the destructor during the main thread exit | |
367 | * is needed while using pthreads-win32 library in Windows. It has been | |
368 | * observed that in pthreads-win32, a call to the destructor during | |
369 | * main thread exit causes undefined behavior. */ | |
370 | static void | |
371 | ovsrcu_cancel_thread_exit_cb(void *aux OVS_UNUSED) | |
372 | { | |
373 | pthread_setspecific(perthread_key, NULL); | |
374 | } | |
375 | ||
0f2ea848 | 376 | static void |
e10022d2 | 377 | ovsrcu_init_module(void) |
0f2ea848 BP |
378 | { |
379 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
380 | if (ovsthread_once_start(&once)) { | |
381 | global_seqno = seq_create(); | |
382 | xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb); | |
d2843eba | 383 | fatal_signal_add_hook(ovsrcu_cancel_thread_exit_cb, NULL, NULL, true); |
417e7e66 | 384 | ovs_list_init(&ovsrcu_threads); |
0f2ea848 BP |
385 | ovs_mutex_init(&ovsrcu_threads_mutex); |
386 | ||
387 | guarded_list_init(&flushed_cbsets); | |
388 | flushed_cbsets_seq = seq_create(); | |
389 | ||
390 | ovsthread_once_done(&once); | |
391 | } | |
392 | } |