]>
Commit | Line | Data |
---|---|---|
3e41733f DL |
1 | /* |
2 | * Copyright (c) 2017-19 David Lamparter, for NetDEF, Inc. | |
3 | * | |
4 | * Permission to use, copy, modify, and distribute this software for any | |
5 | * purpose with or without fee is hereby granted, provided that the above | |
6 | * copyright notice and this permission notice appear in all copies. | |
7 | * | |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
15 | */ | |
16 | ||
17 | /* implementation notes: this is an epoch-based RCU implementation. rcu_seq | |
18 | * (global variable) counts the current epoch. Threads hold a specific epoch | |
19 | * in rcu_read_lock(). This is the oldest epoch a thread might be accessing | |
20 | * data from. | |
21 | * | |
22 | * The rcu_seq global is only pushed forward on rcu_read_lock() and | |
23 | * rcu_read_unlock() calls. This makes things a tad more efficient since | |
24 | * those are the only places it matters: | |
25 | * - on rcu_read_lock, we don't want to hold an old epoch pointlessly | |
26 | * - on rcu_read_unlock, we want to make sure we're not stuck on an old epoch | |
27 | * when heading into a long idle period where no thread holds RCU | |
28 | * | |
29 | * rcu_thread structures themselves are RCU-free'd. | |
30 | * | |
31 | * rcu_head structures are the most iffy; normally for an ATOMLIST we would | |
32 | * need to make sure we use rcu_free or pthread_rwlock to deallocate old items | |
33 | * to prevent ABA or use-after-free problems. However, our ATOMLIST code | |
34 | * guarantees that if the list remains non-empty in all cases, we only need | |
35 | * the "last" pointer to do an "add_tail()", i.e. we can't run into ABA/UAF | |
36 | * issues - but we do need to keep at least 1 item on the list. | |
37 | * | |
38 | * (Search the atomlist code for all uses of "last") | |
39 | */ | |
40 | ||
41 | #ifdef HAVE_CONFIG_H | |
42 | #include "config.h" | |
43 | #endif | |
44 | ||
45 | #include <pthread.h> | |
46 | #ifdef HAVE_PTHREAD_NP_H | |
47 | #include <pthread_np.h> | |
48 | #endif | |
49 | #include <string.h> | |
50 | #include <unistd.h> | |
51 | #include <signal.h> | |
52 | ||
53 | #include "frrcu.h" | |
54 | #include "seqlock.h" | |
55 | #include "atomlist.h" | |
56 | ||
57 | DEFINE_MTYPE_STATIC(LIB, RCU_THREAD, "RCU thread") | |
3e41733f DL |
58 | |
59 | DECLARE_ATOMLIST(rcu_heads, struct rcu_head, head) | |
60 | ||
61 | PREDECL_ATOMLIST(rcu_threads) | |
62 | struct rcu_thread { | |
63 | struct rcu_threads_item head; | |
64 | ||
65 | struct rcu_head rcu_head; | |
66 | ||
67 | struct seqlock rcu; | |
68 | ||
69 | /* only accessed by thread itself, not atomic */ | |
70 | unsigned depth; | |
71 | }; | |
72 | DECLARE_ATOMLIST(rcu_threads, struct rcu_thread, head) | |
73 | ||
74 | static const struct rcu_action rcua_next = { .type = RCUA_NEXT }; | |
75 | static const struct rcu_action rcua_end = { .type = RCUA_END }; | |
76 | static const struct rcu_action rcua_close = { .type = RCUA_CLOSE }; | |
77 | ||
78 | struct rcu_next { | |
79 | struct rcu_head head_free; | |
80 | struct rcu_head head_next; | |
81 | }; | |
82 | ||
83 | #define rcu_free_internal(mtype, ptr, field) \ | |
84 | do { \ | |
85 | typeof(ptr) _ptr = (ptr); \ | |
86 | struct rcu_head *_rcu_head = &_ptr->field; \ | |
87 | static const struct rcu_action _rcu_action = { \ | |
88 | .type = RCUA_FREE, \ | |
89 | .u.free = { \ | |
90 | .mt = mtype, \ | |
91 | .offset = offsetof(typeof(*_ptr), field), \ | |
92 | }, \ | |
93 | }; \ | |
94 | _rcu_head->action = &_rcu_action; \ | |
95 | rcu_heads_add_tail(&rcu_heads, _rcu_head); \ | |
96 | } while (0) | |
97 | ||
98 | /* primary global RCU position */ | |
99 | static struct seqlock rcu_seq; | |
100 | /* this is set to rcu_seq whenever something is added on the RCU queue. | |
101 | * rcu_read_lock() and rcu_read_unlock() will then bump rcu_seq up one step. | |
102 | */ | |
103 | static _Atomic seqlock_val_t rcu_dirty; | |
104 | ||
105 | static struct rcu_threads_head rcu_threads; | |
106 | static struct rcu_heads_head rcu_heads; | |
107 | ||
108 | /* main thread & RCU sweeper have pre-setup rcu_thread structures. The | |
109 | * reasons are different: | |
110 | * | |
111 | * - rcu_thread_main is there because the main thread isn't started like | |
112 | * other threads, it's implicitly created when the program is started. So | |
113 | * rcu_thread_main matches up implicitly. | |
114 | * | |
115 | * - rcu_thread_rcu isn't actually put on the rcu_threads list (makes no | |
116 | * sense really), it only exists so we can call RCU-using functions from | |
117 | * the RCU thread without special handling in rcu_read_lock/unlock. | |
118 | */ | |
119 | static struct rcu_thread rcu_thread_main; | |
120 | static struct rcu_thread rcu_thread_rcu; | |
121 | ||
122 | static pthread_t rcu_pthread; | |
123 | static pthread_key_t rcu_thread_key; | |
124 | static bool rcu_active; | |
125 | ||
126 | static void rcu_start(void); | |
127 | static void rcu_bump(void); | |
128 | ||
129 | /* | |
130 | * preinitialization for main thread | |
131 | */ | |
132 | static void rcu_thread_end(void *rcu_thread); | |
133 | ||
134 | static void rcu_preinit(void) __attribute__((constructor)); | |
135 | static void rcu_preinit(void) | |
136 | { | |
137 | struct rcu_thread *rt; | |
138 | ||
139 | rt = &rcu_thread_main; | |
140 | rt->depth = 1; | |
141 | seqlock_init(&rt->rcu); | |
142 | seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL); | |
143 | ||
144 | pthread_key_create(&rcu_thread_key, rcu_thread_end); | |
145 | pthread_setspecific(rcu_thread_key, rt); | |
146 | ||
147 | rcu_threads_add_tail(&rcu_threads, rt); | |
148 | ||
149 | /* RCU sweeper's rcu_thread is a dummy, NOT added to rcu_threads */ | |
150 | rt = &rcu_thread_rcu; | |
151 | rt->depth = 1; | |
152 | ||
153 | seqlock_init(&rcu_seq); | |
154 | seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL); | |
155 | } | |
156 | ||
157 | static struct rcu_thread *rcu_self(void) | |
158 | { | |
159 | return (struct rcu_thread *)pthread_getspecific(rcu_thread_key); | |
160 | } | |
161 | ||
162 | /* | |
163 | * thread management (for the non-main thread) | |
164 | */ | |
165 | struct rcu_thread *rcu_thread_prepare(void) | |
166 | { | |
167 | struct rcu_thread *rt, *cur; | |
168 | ||
169 | rcu_assert_read_locked(); | |
170 | ||
171 | if (!rcu_active) | |
172 | rcu_start(); | |
173 | ||
174 | cur = rcu_self(); | |
175 | assert(cur->depth); | |
176 | ||
177 | /* new thread always starts with rcu_read_lock held at depth 1, and | |
178 | * holding the same epoch as the parent (this makes it possible to | |
179 | * use RCU for things passed into the thread through its arg) | |
180 | */ | |
181 | rt = XCALLOC(MTYPE_RCU_THREAD, sizeof(*rt)); | |
182 | rt->depth = 1; | |
183 | ||
184 | seqlock_init(&rt->rcu); | |
185 | seqlock_acquire(&rt->rcu, &cur->rcu); | |
186 | ||
187 | rcu_threads_add_tail(&rcu_threads, rt); | |
188 | ||
189 | return rt; | |
190 | } | |
191 | ||
192 | void rcu_thread_start(struct rcu_thread *rt) | |
193 | { | |
194 | pthread_setspecific(rcu_thread_key, rt); | |
195 | } | |
196 | ||
197 | void rcu_thread_unprepare(struct rcu_thread *rt) | |
198 | { | |
199 | if (rt == &rcu_thread_rcu) | |
200 | return; | |
201 | ||
202 | rt->depth = 1; | |
203 | seqlock_acquire(&rt->rcu, &rcu_seq); | |
204 | ||
205 | rcu_bump(); | |
206 | if (rt != &rcu_thread_main) | |
207 | /* this free() happens after seqlock_release() below */ | |
208 | rcu_free_internal(MTYPE_RCU_THREAD, rt, rcu_head); | |
209 | ||
210 | rcu_threads_del(&rcu_threads, rt); | |
211 | seqlock_release(&rt->rcu); | |
212 | } | |
213 | ||
214 | static void rcu_thread_end(void *rtvoid) | |
215 | { | |
216 | struct rcu_thread *rt = rtvoid; | |
217 | rcu_thread_unprepare(rt); | |
218 | } | |
219 | ||
220 | /* | |
221 | * main RCU control aspects | |
222 | */ | |
223 | ||
224 | static void rcu_bump(void) | |
225 | { | |
226 | struct rcu_next *rn; | |
227 | ||
11375c52 | 228 | rn = XMALLOC(MTYPE_RCU_THREAD, sizeof(*rn)); |
3e41733f DL |
229 | |
230 | /* note: each RCUA_NEXT item corresponds to exactly one seqno bump. | |
231 | * This means we don't need to communicate which seqno is which | |
232 | * RCUA_NEXT, since we really don't care. | |
233 | */ | |
234 | ||
235 | /* | |
236 | * Important race condition: while rcu_heads_add_tail is executing, | |
237 | * there is an intermediate point where the rcu_heads "last" pointer | |
238 | * already points to rn->head_next, but rn->head_next isn't added to | |
239 | * the list yet. That means any other "add_tail" calls append to this | |
240 | * item, which isn't fully on the list yet. Freeze this thread at | |
241 | * that point and look at another thread doing a rcu_bump. It adds | |
242 | * these two items and then does a seqlock_bump. But the rcu_heads | |
243 | * list is still "interrupted" and there's no RCUA_NEXT on the list | |
244 | * yet (from either the frozen thread or the second thread). So | |
245 | * rcu_main() might actually hit the end of the list at the | |
246 | * "interrupt". | |
247 | * | |
248 | * This situation is prevented by requiring that rcu_read_lock is held | |
249 | * for any calls to rcu_bump, since if we're holding the current RCU | |
250 | * epoch, that means rcu_main can't be chewing on rcu_heads and hit | |
251 | * that interruption point. Only by the time the thread has continued | |
252 | * to rcu_read_unlock() - and therefore completed the add_tail - the | |
253 | * RCU sweeper gobbles up the epoch and can be sure to find at least | |
254 | * the RCUA_NEXT and RCUA_FREE items on rcu_heads. | |
255 | */ | |
256 | rn->head_next.action = &rcua_next; | |
257 | rcu_heads_add_tail(&rcu_heads, &rn->head_next); | |
258 | ||
259 | /* free rn that we allocated above. | |
260 | * | |
261 | * This is INTENTIONALLY not built into the RCUA_NEXT action. This | |
262 | * ensures that after the action above is popped off the queue, there | |
263 | * is still at least 1 item on the RCU queue. This means we never | |
264 | * delete the last item, which is extremely important since it keeps | |
265 | * the atomlist ->last pointer alive and well. | |
266 | * | |
267 | * If we were to "run dry" on the RCU queue, add_tail may run into the | |
268 | * "last item is being deleted - start over" case, and then we may end | |
269 | * up accessing old RCU queue items that are already free'd. | |
270 | */ | |
11375c52 | 271 | rcu_free_internal(MTYPE_RCU_THREAD, rn, head_free); |
3e41733f DL |
272 | |
273 | /* Only allow the RCU sweeper to run after these 2 items are queued. | |
274 | * | |
275 | * If another thread enqueues some RCU action in the intermediate | |
276 | * window here, nothing bad happens - the queued action is associated | |
277 | * with a larger seq# than strictly necessary. Thus, it might get | |
278 | * executed a bit later, but that's not a problem. | |
279 | * | |
280 | * If another thread acquires the read lock in this window, it holds | |
281 | * the previous epoch, but its RCU queue actions will be in the next | |
282 | * epoch. This isn't a problem either, just a tad inefficient. | |
283 | */ | |
284 | seqlock_bump(&rcu_seq); | |
285 | } | |
286 | ||
287 | static void rcu_bump_maybe(void) | |
288 | { | |
289 | seqlock_val_t dirty; | |
290 | ||
291 | dirty = atomic_load_explicit(&rcu_dirty, memory_order_relaxed); | |
292 | /* no problem if we race here and multiple threads bump rcu_seq; | |
293 | * bumping too much causes no issues while not bumping enough will | |
294 | * result in delayed cleanup | |
295 | */ | |
296 | if (dirty == seqlock_cur(&rcu_seq)) | |
297 | rcu_bump(); | |
298 | } | |
299 | ||
300 | void rcu_read_lock(void) | |
301 | { | |
302 | struct rcu_thread *rt = rcu_self(); | |
303 | ||
304 | assert(rt); | |
305 | if (rt->depth++ > 0) | |
306 | return; | |
307 | ||
308 | seqlock_acquire(&rt->rcu, &rcu_seq); | |
309 | /* need to hold RCU for bump ... */ | |
310 | rcu_bump_maybe(); | |
311 | /* ... but no point in holding the old epoch if we just bumped */ | |
312 | seqlock_acquire(&rt->rcu, &rcu_seq); | |
313 | } | |
314 | ||
315 | void rcu_read_unlock(void) | |
316 | { | |
317 | struct rcu_thread *rt = rcu_self(); | |
318 | ||
319 | assert(rt && rt->depth); | |
320 | if (--rt->depth > 0) | |
321 | return; | |
322 | rcu_bump_maybe(); | |
323 | seqlock_release(&rt->rcu); | |
324 | } | |
325 | ||
326 | void rcu_assert_read_locked(void) | |
327 | { | |
328 | struct rcu_thread *rt = rcu_self(); | |
329 | assert(rt && rt->depth && seqlock_held(&rt->rcu)); | |
330 | } | |
331 | ||
332 | void rcu_assert_read_unlocked(void) | |
333 | { | |
334 | struct rcu_thread *rt = rcu_self(); | |
335 | assert(rt && !rt->depth && !seqlock_held(&rt->rcu)); | |
336 | } | |
337 | ||
338 | /* | |
339 | * RCU resource-release thread | |
340 | */ | |
341 | ||
342 | static void *rcu_main(void *arg); | |
343 | ||
344 | static void rcu_start(void) | |
345 | { | |
346 | /* ensure we never handle signals on the RCU thread by blocking | |
347 | * everything here (new thread inherits signal mask) | |
348 | */ | |
349 | sigset_t oldsigs, blocksigs; | |
350 | ||
351 | sigfillset(&blocksigs); | |
352 | pthread_sigmask(SIG_BLOCK, &blocksigs, &oldsigs); | |
353 | ||
354 | rcu_active = true; | |
355 | ||
356 | assert(!pthread_create(&rcu_pthread, NULL, rcu_main, NULL)); | |
357 | ||
358 | pthread_sigmask(SIG_SETMASK, &oldsigs, NULL); | |
359 | ||
360 | #ifdef HAVE_PTHREAD_SETNAME_NP | |
361 | # ifdef GNU_LINUX | |
362 | pthread_setname_np(rcu_pthread, "RCU sweeper"); | |
363 | # elif defined(__NetBSD__) | |
364 | pthread_setname_np(rcu_pthread, "RCU sweeper", NULL); | |
365 | # endif | |
366 | #elif defined(HAVE_PTHREAD_SET_NAME_NP) | |
367 | pthread_set_name_np(rcu_pthread, "RCU sweeper"); | |
368 | #endif | |
369 | } | |
370 | ||
371 | static void rcu_do(struct rcu_head *rh) | |
372 | { | |
373 | struct rcu_head_close *rhc; | |
374 | void *p; | |
375 | ||
376 | switch (rh->action->type) { | |
377 | case RCUA_FREE: | |
378 | p = (char *)rh - rh->action->u.free.offset; | |
379 | if (rh->action->u.free.mt) | |
380 | qfree(rh->action->u.free.mt, p); | |
381 | else | |
382 | free(p); | |
383 | break; | |
384 | case RCUA_CLOSE: | |
385 | rhc = container_of(rh, struct rcu_head_close, | |
386 | rcu_head); | |
387 | close(rhc->fd); | |
388 | break; | |
389 | case RCUA_CALL: | |
390 | p = (char *)rh - rh->action->u.call.offset; | |
391 | rh->action->u.call.fptr(p); | |
392 | break; | |
393 | ||
394 | case RCUA_INVALID: | |
395 | case RCUA_NEXT: | |
396 | case RCUA_END: | |
397 | default: | |
398 | assert(0); | |
399 | } | |
400 | } | |
401 | ||
402 | static void rcu_watchdog(struct rcu_thread *rt) | |
403 | { | |
404 | #if 0 | |
405 | /* future work: print a backtrace for the thread that's holding up | |
406 | * RCU. The only (good) way of doing that is to send a signal to the | |
407 | * other thread, save away the backtrace in the signal handler, and | |
408 | * block here until the signal is done processing. | |
409 | * | |
410 | * Just haven't implemented that yet. | |
411 | */ | |
412 | fprintf(stderr, "RCU watchdog %p\n", rt); | |
413 | #endif | |
414 | } | |
415 | ||
416 | static void *rcu_main(void *arg) | |
417 | { | |
418 | struct rcu_thread *rt; | |
419 | struct rcu_head *rh = NULL; | |
420 | bool end = false; | |
421 | struct timespec maxwait; | |
422 | ||
423 | seqlock_val_t rcuval = SEQLOCK_STARTVAL; | |
424 | ||
425 | pthread_setspecific(rcu_thread_key, &rcu_thread_rcu); | |
426 | ||
427 | while (!end) { | |
428 | seqlock_wait(&rcu_seq, rcuval); | |
429 | ||
430 | /* RCU watchdog timeout, TODO: configurable value */ | |
431 | clock_gettime(CLOCK_MONOTONIC, &maxwait); | |
432 | maxwait.tv_nsec += 100 * 1000 * 1000; | |
433 | if (maxwait.tv_nsec >= 1000000000) { | |
434 | maxwait.tv_sec++; | |
435 | maxwait.tv_nsec -= 1000000000; | |
436 | } | |
437 | ||
438 | frr_each (rcu_threads, &rcu_threads, rt) | |
439 | if (!seqlock_timedwait(&rt->rcu, rcuval, &maxwait)) { | |
440 | rcu_watchdog(rt); | |
441 | seqlock_wait(&rt->rcu, rcuval); | |
442 | } | |
443 | ||
444 | while ((rh = rcu_heads_pop(&rcu_heads))) { | |
445 | if (rh->action->type == RCUA_NEXT) | |
446 | break; | |
447 | else if (rh->action->type == RCUA_END) | |
448 | end = true; | |
449 | else | |
450 | rcu_do(rh); | |
451 | } | |
452 | ||
453 | rcuval += SEQLOCK_INCR; | |
454 | } | |
455 | ||
456 | /* rcu_shutdown can only be called singlethreaded, and it does a | |
457 | * pthread_join, so it should be impossible that anything ended up | |
458 | * on the queue after RCUA_END | |
459 | */ | |
460 | #if 1 | |
461 | assert(!rcu_heads_first(&rcu_heads)); | |
462 | #else | |
463 | while ((rh = rcu_heads_pop(&rcu_heads))) | |
464 | if (rh->action->type >= RCUA_FREE) | |
465 | rcu_do(rh); | |
466 | #endif | |
467 | return NULL; | |
468 | } | |
469 | ||
470 | void rcu_shutdown(void) | |
471 | { | |
472 | static struct rcu_head rcu_head_end; | |
473 | struct rcu_thread *rt = rcu_self(); | |
474 | void *retval; | |
475 | ||
476 | if (!rcu_active) | |
477 | return; | |
478 | ||
479 | rcu_assert_read_locked(); | |
480 | assert(rcu_threads_count(&rcu_threads) == 1); | |
481 | ||
482 | rcu_enqueue(&rcu_head_end, &rcua_end); | |
483 | ||
484 | rt->depth = 0; | |
485 | seqlock_release(&rt->rcu); | |
486 | seqlock_release(&rcu_seq); | |
487 | rcu_active = false; | |
488 | ||
489 | /* clearing rcu_active is before pthread_join in case we hang in | |
490 | * pthread_join & get a SIGTERM or something - in that case, just | |
491 | * ignore the maybe-still-running RCU thread | |
492 | */ | |
493 | if (pthread_join(rcu_pthread, &retval) == 0) { | |
494 | seqlock_acquire_val(&rcu_seq, SEQLOCK_STARTVAL); | |
495 | seqlock_acquire_val(&rt->rcu, SEQLOCK_STARTVAL); | |
496 | rt->depth = 1; | |
497 | } | |
498 | } | |
499 | ||
500 | /* | |
501 | * RCU'd free functions | |
502 | */ | |
503 | ||
504 | void rcu_enqueue(struct rcu_head *rh, const struct rcu_action *action) | |
505 | { | |
506 | /* refer to rcu_bump() for why we need to hold RCU when adding items | |
507 | * to rcu_heads | |
508 | */ | |
509 | rcu_assert_read_locked(); | |
510 | ||
511 | rh->action = action; | |
512 | ||
513 | if (!rcu_active) { | |
514 | rcu_do(rh); | |
515 | return; | |
516 | } | |
517 | rcu_heads_add_tail(&rcu_heads, rh); | |
518 | atomic_store_explicit(&rcu_dirty, seqlock_cur(&rcu_seq), | |
519 | memory_order_relaxed); | |
520 | } | |
521 | ||
522 | void rcu_close(struct rcu_head_close *rhc, int fd) | |
523 | { | |
524 | rhc->fd = fd; | |
525 | rcu_enqueue(&rhc->rcu_head, &rcua_close); | |
526 | } |