]>
Commit | Line | Data |
---|---|---|
0f2ea848 | 1 | /* |
2ad4971f | 2 | * Copyright (c) 2014, 2015, 2016 Nicira, Inc. |
0f2ea848 BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #ifndef OVS_RCU_H | |
18 | #define OVS_RCU_H 1 | |
19 | ||
20 | /* Read-Copy-Update (RCU) | |
21 | * ====================== | |
22 | * | |
23 | * Introduction | |
24 | * ------------ | |
25 | * | |
26 | * Atomic pointer access makes it pretty easy to implement lock-free | |
27 | * algorithms. There is one big problem, though: when a writer updates a | |
28 | * pointer to point to a new data structure, some thread might be reading the | |
29 | * old version, and there's no convenient way to free the old version when all | |
30 | * threads are done with the old version. | |
31 | * | |
32 | * The function ovsrcu_postpone() solves that problem. The function pointer | |
33 | * passed in as its argument is called only after all threads are done with old | |
34 | * versions of data structures. The function callback frees an old version of | |
35 | * data no longer in use. This technique is called "read-copy-update", or RCU | |
36 | * for short. | |
37 | * | |
38 | * | |
39 | * Details | |
40 | * ------- | |
41 | * | |
42 | * A "quiescent state" is a time at which a thread holds no pointers to memory | |
43 | * that is managed by RCU; that is, when the thread is known not to reference | |
44 | * memory that might be an old version of some object freed via RCU. For | |
5724fca4 | 45 | * example, poll_block() includes a quiescent state. |
0f2ea848 BP |
46 | * |
47 | * The following functions manage the recognition of quiescent states: | |
48 | * | |
49 | * void ovsrcu_quiesce(void) | |
50 | * | |
51 | * Recognizes a momentary quiescent state in the current thread. | |
52 | * | |
53 | * void ovsrcu_quiesce_start(void) | |
54 | * void ovsrcu_quiesce_end(void) | |
55 | * | |
56 | * Brackets a time period during which the current thread is quiescent. | |
57 | * | |
13b6d087 DDP |
58 | * A newly created thread is initially active, not quiescent. When a process |
59 | * becomes multithreaded, the main thread becomes active, not quiescent. | |
0f2ea848 BP |
60 | * |
61 | * When a quiescient state has occurred in every thread, we say that a "grace | |
62 | * period" has occurred. Following a grace period, all of the callbacks | |
2541d759 | 63 | * postponed before the start of the grace period MAY be invoked. OVS takes |
0f2ea848 BP |
64 | * care of this automatically through the RCU mechanism: while a process still |
65 | * has only a single thread, it invokes the postponed callbacks directly from | |
66 | * ovsrcu_quiesce() and ovsrcu_quiesce_start(); after additional threads have | |
67 | * been created, it creates an extra helper thread to invoke callbacks. | |
68 | * | |
2541d759 JR |
69 | * Please note that while a postponed function call is guaranteed to happen |
70 | * after the next time all participating threads have quiesced at least once, | |
71 | * there is no quarantee that all postponed functions are called as early as | |
72 | * possible, or that the functions postponed by different threads would be | |
73 | * called in the order the registrations took place. In particular, even if | |
74 | * two threads provably postpone a function each in a specific order, the | |
75 | * postponed functions may still be called in the opposite order, depending on | |
76 | * the timing of when the threads call ovsrcu_quiesce(), how many functions | |
77 | * they postpone, and when the ovs-rcu thread happens to grab the functions to | |
78 | * be called. | |
79 | * | |
80 | * All functions postponed by a single thread are guaranteed to execute in the | |
81 | * order they were postponed, however. | |
0f2ea848 | 82 | * |
be9d0de7 BP |
83 | * Usage |
84 | * ----- | |
0f2ea848 BP |
85 | * |
86 | * Use OVSRCU_TYPE(TYPE) to declare a pointer to RCU-protected data, e.g. the | |
87 | * following declares an RCU-protected "struct flow *" named flowp: | |
88 | * | |
89 | * OVSRCU_TYPE(struct flow *) flowp; | |
90 | * | |
91 | * Use ovsrcu_get(TYPE, VAR) to read an RCU-protected pointer, e.g. to read the | |
92 | * pointer variable declared above: | |
93 | * | |
e10022d2 JR |
94 | * struct flow *flow = ovsrcu_get(struct flow *, &flowp); |
95 | * | |
96 | * If the pointer variable is currently protected against change (because | |
97 | * the current thread holds a mutex that protects it), ovsrcu_get_protected() | |
98 | * may be used instead. Only on the Alpha architecture is this likely to | |
99 | * generate different code, but it may be useful documentation. | |
100 | * | |
101 | * (With GNU C or Clang, you get a compiler error if TYPE is wrong; other | |
102 | * compilers will merrily carry along accepting the wrong type.) | |
0f2ea848 BP |
103 | * |
104 | * Use ovsrcu_set() to write an RCU-protected pointer and ovsrcu_postpone() to | |
7e5f06c3 JR |
105 | * free the previous data. ovsrcu_set_hidden() can be used on RCU protected |
106 | * data not visible to any readers yet, but will be made visible by a later | |
107 | * ovsrcu_set(). ovsrcu_init() can be used to initialize RCU pointers when | |
108 | * no readers are yet executing. If more than one thread can write the | |
109 | * pointer, then some form of external synchronization, e.g. a mutex, is | |
110 | * needed to prevent writers from interfering with one another. For example, | |
111 | * to write the pointer variable declared above while safely freeing the old | |
112 | * value: | |
0f2ea848 BP |
113 | * |
114 | * static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; | |
115 | * | |
e10022d2 | 116 | * OVSRCU_TYPE(struct flow *) flowp; |
0f2ea848 BP |
117 | * |
118 | * void | |
119 | * change_flow(struct flow *new_flow) | |
120 | * { | |
121 | * ovs_mutex_lock(&mutex); | |
e10022d2 | 122 | * ovsrcu_postpone(free, |
0f2ea848 BP |
123 | * ovsrcu_get_protected(struct flow *, &flowp)); |
124 | * ovsrcu_set(&flowp, new_flow); | |
125 | * ovs_mutex_unlock(&mutex); | |
126 | * } | |
127 | * | |
e9217f5a DDP |
128 | * In some rare cases an object may not be addressable with a pointer, but only |
129 | * through an array index (e.g. because it's provided by another library). It | |
130 | * is still possible to have RCU semantics by using the ovsrcu_index type. | |
131 | * | |
132 | * static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; | |
133 | * | |
134 | * ovsrcu_index port_id; | |
135 | * | |
136 | * void tx() | |
137 | * { | |
138 | * int id = ovsrcu_index_get(&port_id); | |
139 | * if (id == -1) { | |
140 | * return; | |
141 | * } | |
142 | * port_tx(id); | |
143 | * } | |
144 | * | |
145 | * void delete() | |
146 | * { | |
147 | * int id; | |
148 | * | |
149 | * ovs_mutex_lock(&mutex); | |
150 | * id = ovsrcu_index_get_protected(&port_id); | |
151 | * ovsrcu_index_set(&port_id, -1); | |
152 | * ovs_mutex_unlock(&mutex); | |
153 | * | |
154 | * ovsrcu_synchronize(); | |
155 | * port_delete(id); | |
156 | * } | |
157 | * | |
0f2ea848 BP |
158 | */ |
159 | ||
160 | #include "compiler.h" | |
161 | #include "ovs-atomic.h" | |
162 | ||
0f2ea848 BP |
163 | #if __GNUC__ |
164 | #define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; } | |
b1b72f2d | 165 | #define OVSRCU_INITIALIZER(VALUE) { ATOMIC_VAR_INIT(VALUE) } |
0f2ea848 BP |
166 | #define ovsrcu_get__(TYPE, VAR, ORDER) \ |
167 | ({ \ | |
168 | TYPE value__; \ | |
2924cf52 | 169 | typeof(VAR) ovsrcu_var = (VAR); \ |
0f2ea848 | 170 | \ |
2924cf52 | 171 | atomic_read_explicit(CONST_CAST(ATOMIC(TYPE) *, &ovsrcu_var->p), \ |
0f2ea848 BP |
172 | &value__, ORDER); \ |
173 | \ | |
174 | value__; \ | |
175 | }) | |
176 | #define ovsrcu_get(TYPE, VAR) \ | |
2924cf52 | 177 | ovsrcu_get__(TYPE, VAR, memory_order_consume) |
0f2ea848 | 178 | #define ovsrcu_get_protected(TYPE, VAR) \ |
2924cf52 | 179 | ovsrcu_get__(TYPE, VAR, memory_order_relaxed) |
328fa0dc JR |
180 | |
181 | /* 'VALUE' may be an atomic operation, which must be evaluated before | |
182 | * any of the body of the atomic_store_explicit. Since the type of | |
183 | * 'VAR' is not fixed, we cannot use an inline function to get | |
184 | * function semantics for this. */ | |
185 | #define ovsrcu_set__(VAR, VALUE, ORDER) \ | |
186 | ({ \ | |
187 | typeof(VAR) ovsrcu_var = (VAR); \ | |
188 | typeof(VALUE) ovsrcu_value = (VALUE); \ | |
189 | memory_order ovsrcu_order = (ORDER); \ | |
190 | \ | |
191 | atomic_store_explicit(&ovsrcu_var->p, ovsrcu_value, ovsrcu_order); \ | |
192 | (void *) 0; \ | |
193 | }) | |
0f2ea848 | 194 | #else /* not GNU C */ |
61edaebb | 195 | struct ovsrcu_pointer { ATOMIC(void *) p; }; |
0f2ea848 | 196 | #define OVSRCU_TYPE(TYPE) struct ovsrcu_pointer |
b1b72f2d | 197 | #define OVSRCU_INITIALIZER(VALUE) { ATOMIC_VAR_INIT(VALUE) } |
0f2ea848 BP |
198 | static inline void * |
199 | ovsrcu_get__(const struct ovsrcu_pointer *pointer, memory_order order) | |
200 | { | |
201 | void *value; | |
202 | atomic_read_explicit(&CONST_CAST(struct ovsrcu_pointer *, pointer)->p, | |
203 | &value, order); | |
204 | return value; | |
205 | } | |
206 | #define ovsrcu_get(TYPE, VAR) \ | |
207 | CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_consume)) | |
208 | #define ovsrcu_get_protected(TYPE, VAR) \ | |
209 | CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_relaxed)) | |
328fa0dc JR |
210 | |
211 | static inline void ovsrcu_set__(struct ovsrcu_pointer *pointer, | |
212 | const void *value, | |
213 | memory_order order) | |
214 | { | |
215 | atomic_store_explicit(&pointer->p, CONST_CAST(void *, value), order); | |
216 | } | |
0f2ea848 BP |
217 | #endif |
218 | ||
219 | /* Writes VALUE to the RCU-protected pointer whose address is VAR. | |
220 | * | |
221 | * Users require external synchronization (e.g. a mutex). See "Usage" above | |
222 | * for an example. */ | |
223 | #define ovsrcu_set(VAR, VALUE) \ | |
328fa0dc | 224 | ovsrcu_set__(VAR, VALUE, memory_order_release) |
0f2ea848 | 225 | |
e10022d2 JR |
226 | /* This can be used for initializing RCU pointers before any readers can |
227 | * see them. A later ovsrcu_set() needs to make the bigger structure this | |
228 | * is part of visible to the readers. */ | |
7e5f06c3 | 229 | #define ovsrcu_set_hidden(VAR, VALUE) \ |
328fa0dc | 230 | ovsrcu_set__(VAR, VALUE, memory_order_relaxed) |
e10022d2 | 231 | |
7e5f06c3 JR |
232 | /* This can be used for initializing RCU pointers before any readers are |
233 | * executing. */ | |
234 | #define ovsrcu_init(VAR, VALUE) atomic_init(&(VAR)->p, VALUE) | |
235 | ||
0f2ea848 | 236 | /* Calls FUNCTION passing ARG as its pointer-type argument following the next |
be9d0de7 | 237 | * grace period. See "Usage" above for an example. */ |
0f2ea848 BP |
238 | void ovsrcu_postpone__(void (*function)(void *aux), void *aux); |
239 | #define ovsrcu_postpone(FUNCTION, ARG) \ | |
2ad4971f BP |
240 | (/* Verify that ARG is appropriate for FUNCTION. */ \ |
241 | (void) sizeof((FUNCTION)(ARG), 1), \ | |
242 | /* Verify that ARG is a pointer type. */ \ | |
0f2ea848 BP |
243 | (void) sizeof(*(ARG)), \ |
244 | ovsrcu_postpone__((void (*)(void *))(FUNCTION), ARG)) | |
245 | ||
e9217f5a DDP |
246 | /* An array index protected by RCU semantics. This is an easier alternative to |
247 | * an RCU protected pointer to a malloc'd int. */ | |
248 | typedef struct { atomic_int v; } ovsrcu_index; | |
249 | ||
250 | static inline int ovsrcu_index_get__(const ovsrcu_index *i, memory_order order) | |
251 | { | |
252 | int ret; | |
253 | atomic_read_explicit(CONST_CAST(atomic_int *, &i->v), &ret, order); | |
254 | return ret; | |
255 | } | |
256 | ||
257 | /* Returns the index contained in 'i'. The returned value can be used until | |
258 | * the next grace period. */ | |
259 | static inline int ovsrcu_index_get(const ovsrcu_index *i) | |
260 | { | |
261 | return ovsrcu_index_get__(i, memory_order_consume); | |
262 | } | |
263 | ||
264 | /* Returns the index contained in 'i'. This is an alternative to | |
265 | * ovsrcu_index_get() that can be used when there's no possible concurrent | |
266 | * writer. */ | |
267 | static inline int ovsrcu_index_get_protected(const ovsrcu_index *i) | |
268 | { | |
269 | return ovsrcu_index_get__(i, memory_order_relaxed); | |
270 | } | |
271 | ||
272 | static inline void ovsrcu_index_set__(ovsrcu_index *i, int value, | |
273 | memory_order order) | |
274 | { | |
275 | atomic_store_explicit(&i->v, value, order); | |
276 | } | |
277 | ||
278 | /* Writes the index 'value' in 'i'. The previous value of 'i' may still be | |
279 | * used by readers until the next grace period. */ | |
280 | static inline void ovsrcu_index_set(ovsrcu_index *i, int value) | |
281 | { | |
282 | ovsrcu_index_set__(i, value, memory_order_release); | |
283 | } | |
284 | ||
285 | /* Writes the index 'value' in 'i'. This is an alternative to | |
286 | * ovsrcu_index_set() that can be used when there's no possible concurrent | |
287 | * reader. */ | |
288 | static inline void ovsrcu_index_set_hidden(ovsrcu_index *i, int value) | |
289 | { | |
290 | ovsrcu_index_set__(i, value, memory_order_relaxed); | |
291 | } | |
292 | ||
293 | /* Initializes 'i' with 'value'. This is safe to call as long as there are no | |
294 | * concurrent readers. */ | |
295 | static inline void ovsrcu_index_init(ovsrcu_index *i, int value) | |
296 | { | |
297 | atomic_init(&i->v, value); | |
298 | } | |
299 | ||
0f2ea848 BP |
300 | /* Quiescent states. */ |
301 | void ovsrcu_quiesce_start(void); | |
302 | void ovsrcu_quiesce_end(void); | |
303 | void ovsrcu_quiesce(void); | |
9dede5cf | 304 | int ovsrcu_try_quiesce(void); |
3308c696 | 305 | bool ovsrcu_is_quiescent(void); |
0f2ea848 | 306 | |
0426e67c JR |
307 | /* Synchronization. Waits for all non-quiescent threads to quiesce at least |
308 | * once. This can block for a relatively long time. */ | |
309 | void ovsrcu_synchronize(void); | |
310 | ||
0f2ea848 | 311 | #endif /* ovs-rcu.h */ |