]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-rcu.h
ovsdb-idl: Fix iteration over tracked rows with no actual data.
[mirror_ovs.git] / lib / ovs-rcu.h
CommitLineData
0f2ea848 1/*
2ad4971f 2 * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
0f2ea848
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef OVS_RCU_H
18#define OVS_RCU_H 1
19
20/* Read-Copy-Update (RCU)
21 * ======================
22 *
23 * Introduction
24 * ------------
25 *
26 * Atomic pointer access makes it pretty easy to implement lock-free
27 * algorithms. There is one big problem, though: when a writer updates a
28 * pointer to point to a new data structure, some thread might be reading the
29 * old version, and there's no convenient way to free the old version when all
30 * threads are done with the old version.
31 *
32 * The function ovsrcu_postpone() solves that problem. The function pointer
33 * passed in as its argument is called only after all threads are done with old
34 * versions of data structures. The function callback frees an old version of
35 * data no longer in use. This technique is called "read-copy-update", or RCU
36 * for short.
37 *
38 *
39 * Details
40 * -------
41 *
42 * A "quiescent state" is a time at which a thread holds no pointers to memory
43 * that is managed by RCU; that is, when the thread is known not to reference
44 * memory that might be an old version of some object freed via RCU. For
5724fca4 45 * example, poll_block() includes a quiescent state.
0f2ea848
BP
46 *
47 * The following functions manage the recognition of quiescent states:
48 *
49 * void ovsrcu_quiesce(void)
50 *
51 * Recognizes a momentary quiescent state in the current thread.
52 *
53 * void ovsrcu_quiesce_start(void)
54 * void ovsrcu_quiesce_end(void)
55 *
56 * Brackets a time period during which the current thread is quiescent.
57 *
13b6d087
DDP
58 * A newly created thread is initially active, not quiescent. When a process
59 * becomes multithreaded, the main thread becomes active, not quiescent.
0f2ea848
BP
60 *
61 * When a quiescient state has occurred in every thread, we say that a "grace
62 * period" has occurred. Following a grace period, all of the callbacks
2541d759 63 * postponed before the start of the grace period MAY be invoked. OVS takes
0f2ea848
BP
64 * care of this automatically through the RCU mechanism: while a process still
65 * has only a single thread, it invokes the postponed callbacks directly from
66 * ovsrcu_quiesce() and ovsrcu_quiesce_start(); after additional threads have
67 * been created, it creates an extra helper thread to invoke callbacks.
68 *
2541d759
JR
69 * Please note that while a postponed function call is guaranteed to happen
70 * after the next time all participating threads have quiesced at least once,
71 * there is no quarantee that all postponed functions are called as early as
72 * possible, or that the functions postponed by different threads would be
73 * called in the order the registrations took place. In particular, even if
74 * two threads provably postpone a function each in a specific order, the
75 * postponed functions may still be called in the opposite order, depending on
76 * the timing of when the threads call ovsrcu_quiesce(), how many functions
77 * they postpone, and when the ovs-rcu thread happens to grab the functions to
78 * be called.
79 *
80 * All functions postponed by a single thread are guaranteed to execute in the
81 * order they were postponed, however.
0f2ea848 82 *
be9d0de7
BP
83 * Usage
84 * -----
0f2ea848
BP
85 *
86 * Use OVSRCU_TYPE(TYPE) to declare a pointer to RCU-protected data, e.g. the
87 * following declares an RCU-protected "struct flow *" named flowp:
88 *
89 * OVSRCU_TYPE(struct flow *) flowp;
90 *
91 * Use ovsrcu_get(TYPE, VAR) to read an RCU-protected pointer, e.g. to read the
92 * pointer variable declared above:
93 *
e10022d2
JR
94 * struct flow *flow = ovsrcu_get(struct flow *, &flowp);
95 *
96 * If the pointer variable is currently protected against change (because
97 * the current thread holds a mutex that protects it), ovsrcu_get_protected()
98 * may be used instead. Only on the Alpha architecture is this likely to
99 * generate different code, but it may be useful documentation.
100 *
101 * (With GNU C or Clang, you get a compiler error if TYPE is wrong; other
102 * compilers will merrily carry along accepting the wrong type.)
0f2ea848
BP
103 *
104 * Use ovsrcu_set() to write an RCU-protected pointer and ovsrcu_postpone() to
7e5f06c3
JR
105 * free the previous data. ovsrcu_set_hidden() can be used on RCU protected
106 * data not visible to any readers yet, but will be made visible by a later
107 * ovsrcu_set(). ovsrcu_init() can be used to initialize RCU pointers when
108 * no readers are yet executing. If more than one thread can write the
109 * pointer, then some form of external synchronization, e.g. a mutex, is
110 * needed to prevent writers from interfering with one another. For example,
111 * to write the pointer variable declared above while safely freeing the old
112 * value:
0f2ea848
BP
113 *
114 * static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
115 *
e10022d2 116 * OVSRCU_TYPE(struct flow *) flowp;
0f2ea848
BP
117 *
118 * void
119 * change_flow(struct flow *new_flow)
120 * {
121 * ovs_mutex_lock(&mutex);
e10022d2 122 * ovsrcu_postpone(free,
0f2ea848
BP
123 * ovsrcu_get_protected(struct flow *, &flowp));
124 * ovsrcu_set(&flowp, new_flow);
125 * ovs_mutex_unlock(&mutex);
126 * }
127 *
e9217f5a
DDP
128 * In some rare cases an object may not be addressable with a pointer, but only
129 * through an array index (e.g. because it's provided by another library). It
130 * is still possible to have RCU semantics by using the ovsrcu_index type.
131 *
132 * static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
133 *
134 * ovsrcu_index port_id;
135 *
136 * void tx()
137 * {
138 * int id = ovsrcu_index_get(&port_id);
139 * if (id == -1) {
140 * return;
141 * }
142 * port_tx(id);
143 * }
144 *
145 * void delete()
146 * {
147 * int id;
148 *
149 * ovs_mutex_lock(&mutex);
150 * id = ovsrcu_index_get_protected(&port_id);
151 * ovsrcu_index_set(&port_id, -1);
152 * ovs_mutex_unlock(&mutex);
153 *
154 * ovsrcu_synchronize();
155 * port_delete(id);
156 * }
157 *
0f2ea848
BP
158 */
159
160#include "compiler.h"
161#include "ovs-atomic.h"
162
0f2ea848
BP
163#if __GNUC__
164#define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; }
b1b72f2d 165#define OVSRCU_INITIALIZER(VALUE) { ATOMIC_VAR_INIT(VALUE) }
0f2ea848
BP
166#define ovsrcu_get__(TYPE, VAR, ORDER) \
167 ({ \
168 TYPE value__; \
2924cf52 169 typeof(VAR) ovsrcu_var = (VAR); \
0f2ea848 170 \
2924cf52 171 atomic_read_explicit(CONST_CAST(ATOMIC(TYPE) *, &ovsrcu_var->p), \
0f2ea848
BP
172 &value__, ORDER); \
173 \
174 value__; \
175 })
176#define ovsrcu_get(TYPE, VAR) \
2924cf52 177 ovsrcu_get__(TYPE, VAR, memory_order_consume)
0f2ea848 178#define ovsrcu_get_protected(TYPE, VAR) \
2924cf52 179 ovsrcu_get__(TYPE, VAR, memory_order_relaxed)
328fa0dc
JR
180
181/* 'VALUE' may be an atomic operation, which must be evaluated before
182 * any of the body of the atomic_store_explicit. Since the type of
183 * 'VAR' is not fixed, we cannot use an inline function to get
184 * function semantics for this. */
185#define ovsrcu_set__(VAR, VALUE, ORDER) \
186 ({ \
187 typeof(VAR) ovsrcu_var = (VAR); \
188 typeof(VALUE) ovsrcu_value = (VALUE); \
189 memory_order ovsrcu_order = (ORDER); \
190 \
191 atomic_store_explicit(&ovsrcu_var->p, ovsrcu_value, ovsrcu_order); \
192 (void *) 0; \
193 })
0f2ea848 194#else /* not GNU C */
61edaebb 195struct ovsrcu_pointer { ATOMIC(void *) p; };
0f2ea848 196#define OVSRCU_TYPE(TYPE) struct ovsrcu_pointer
b1b72f2d 197#define OVSRCU_INITIALIZER(VALUE) { ATOMIC_VAR_INIT(VALUE) }
0f2ea848
BP
198static inline void *
199ovsrcu_get__(const struct ovsrcu_pointer *pointer, memory_order order)
200{
201 void *value;
202 atomic_read_explicit(&CONST_CAST(struct ovsrcu_pointer *, pointer)->p,
203 &value, order);
204 return value;
205}
206#define ovsrcu_get(TYPE, VAR) \
207 CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_consume))
208#define ovsrcu_get_protected(TYPE, VAR) \
209 CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_relaxed))
328fa0dc
JR
210
211static inline void ovsrcu_set__(struct ovsrcu_pointer *pointer,
212 const void *value,
213 memory_order order)
214{
215 atomic_store_explicit(&pointer->p, CONST_CAST(void *, value), order);
216}
0f2ea848
BP
217#endif
218
219/* Writes VALUE to the RCU-protected pointer whose address is VAR.
220 *
221 * Users require external synchronization (e.g. a mutex). See "Usage" above
222 * for an example. */
223#define ovsrcu_set(VAR, VALUE) \
328fa0dc 224 ovsrcu_set__(VAR, VALUE, memory_order_release)
0f2ea848 225
e10022d2
JR
226/* This can be used for initializing RCU pointers before any readers can
227 * see them. A later ovsrcu_set() needs to make the bigger structure this
228 * is part of visible to the readers. */
7e5f06c3 229#define ovsrcu_set_hidden(VAR, VALUE) \
328fa0dc 230 ovsrcu_set__(VAR, VALUE, memory_order_relaxed)
e10022d2 231
7e5f06c3
JR
232/* This can be used for initializing RCU pointers before any readers are
233 * executing. */
234#define ovsrcu_init(VAR, VALUE) atomic_init(&(VAR)->p, VALUE)
235
0f2ea848 236/* Calls FUNCTION passing ARG as its pointer-type argument following the next
be9d0de7 237 * grace period. See "Usage" above for an example. */
0f2ea848
BP
238void ovsrcu_postpone__(void (*function)(void *aux), void *aux);
239#define ovsrcu_postpone(FUNCTION, ARG) \
2ad4971f
BP
240 (/* Verify that ARG is appropriate for FUNCTION. */ \
241 (void) sizeof((FUNCTION)(ARG), 1), \
242 /* Verify that ARG is a pointer type. */ \
0f2ea848
BP
243 (void) sizeof(*(ARG)), \
244 ovsrcu_postpone__((void (*)(void *))(FUNCTION), ARG))
245
e9217f5a
DDP
246/* An array index protected by RCU semantics. This is an easier alternative to
247 * an RCU protected pointer to a malloc'd int. */
248typedef struct { atomic_int v; } ovsrcu_index;
249
250static inline int ovsrcu_index_get__(const ovsrcu_index *i, memory_order order)
251{
252 int ret;
253 atomic_read_explicit(CONST_CAST(atomic_int *, &i->v), &ret, order);
254 return ret;
255}
256
257/* Returns the index contained in 'i'. The returned value can be used until
258 * the next grace period. */
259static inline int ovsrcu_index_get(const ovsrcu_index *i)
260{
261 return ovsrcu_index_get__(i, memory_order_consume);
262}
263
264/* Returns the index contained in 'i'. This is an alternative to
265 * ovsrcu_index_get() that can be used when there's no possible concurrent
266 * writer. */
267static inline int ovsrcu_index_get_protected(const ovsrcu_index *i)
268{
269 return ovsrcu_index_get__(i, memory_order_relaxed);
270}
271
272static inline void ovsrcu_index_set__(ovsrcu_index *i, int value,
273 memory_order order)
274{
275 atomic_store_explicit(&i->v, value, order);
276}
277
278/* Writes the index 'value' in 'i'. The previous value of 'i' may still be
279 * used by readers until the next grace period. */
280static inline void ovsrcu_index_set(ovsrcu_index *i, int value)
281{
282 ovsrcu_index_set__(i, value, memory_order_release);
283}
284
285/* Writes the index 'value' in 'i'. This is an alternative to
286 * ovsrcu_index_set() that can be used when there's no possible concurrent
287 * reader. */
288static inline void ovsrcu_index_set_hidden(ovsrcu_index *i, int value)
289{
290 ovsrcu_index_set__(i, value, memory_order_relaxed);
291}
292
293/* Initializes 'i' with 'value'. This is safe to call as long as there are no
294 * concurrent readers. */
295static inline void ovsrcu_index_init(ovsrcu_index *i, int value)
296{
297 atomic_init(&i->v, value);
298}
299
0f2ea848
BP
300/* Quiescent states. */
301void ovsrcu_quiesce_start(void);
302void ovsrcu_quiesce_end(void);
303void ovsrcu_quiesce(void);
9dede5cf 304int ovsrcu_try_quiesce(void);
3308c696 305bool ovsrcu_is_quiescent(void);
0f2ea848 306
0426e67c
JR
307/* Synchronization. Waits for all non-quiescent threads to quiesce at least
308 * once. This can block for a relatively long time. */
309void ovsrcu_synchronize(void);
310
9a3cf0ac
BP
311void ovsrcu_exit(void);
312
0f2ea848 313#endif /* ovs-rcu.h */