]>
Commit | Line | Data |
---|---|---|
acddc0ed | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
5c0c651c QY |
2 | /* BGP Keepalives. |
3 | * Implements a producer thread to generate BGP keepalives for peers. | |
4 | * Copyright (C) 2017 Cumulus Networks, Inc. | |
5 | * Quentin Young | |
03014d48 | 6 | */ |
5c0c651c | 7 | |
6ee8ea1c | 8 | /* clang-format off */ |
03014d48 | 9 | #include <zebra.h> |
6ee8ea1c | 10 | #include <pthread.h> // for pthread_mutex_lock, pthread_mutex_unlock |
03014d48 | 11 | |
6ee8ea1c QY |
12 | #include "frr_pthread.h" // for frr_pthread |
13 | #include "hash.h" // for hash, hash_clean, hash_create_size... | |
14 | #include "log.h" // for zlog_debug | |
15 | #include "memory.h" // for MTYPE_TMP, XFREE, XCALLOC, XMALLOC | |
16 | #include "monotime.h" // for monotime, monotime_since | |
03014d48 | 17 | |
e16d030c | 18 | #include "bgpd/bgpd.h" // for peer, PEER_EVENT_KEEPALIVES_ON, peer... |
6ee8ea1c QY |
19 | #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events |
20 | #include "bgpd/bgp_packet.h" // for bgp_keepalive_send | |
03014d48 | 21 | #include "bgpd/bgp_keepalives.h" |
6ee8ea1c | 22 | /* clang-format on */ |
03014d48 | 23 | |
19a713be DS |
24 | DEFINE_MTYPE_STATIC(BGPD, BGP_PKAT, "Peer KeepAlive Timer"); |
25 | DEFINE_MTYPE_STATIC(BGPD, BGP_COND, "BGP Peer pthread Conditional"); | |
26 | DEFINE_MTYPE_STATIC(BGPD, BGP_MUTEX, "BGP Peer pthread Mutex"); | |
27 | ||
a715eab3 | 28 | /* |
03014d48 QY |
29 | * Peer KeepAlive Timer. |
30 | * Associates a peer with the time of its last keepalive. | |
31 | */ | |
32 | struct pkat { | |
a715eab3 | 33 | /* the peer to send keepalives to */ |
03014d48 | 34 | struct peer *peer; |
a715eab3 | 35 | /* absolute time of last keepalive sent */ |
03014d48 QY |
36 | struct timeval last; |
37 | }; | |
38 | ||
39 | /* List of peers we are sending keepalives for, and associated mutex. */ | |
bd8b71e4 QY |
40 | static pthread_mutex_t *peerhash_mtx; |
41 | static pthread_cond_t *peerhash_cond; | |
42 | static struct hash *peerhash; | |
03014d48 | 43 | |
03014d48 QY |
44 | static struct pkat *pkat_new(struct peer *peer) |
45 | { | |
19a713be | 46 | struct pkat *pkat = XMALLOC(MTYPE_BGP_PKAT, sizeof(struct pkat)); |
03014d48 QY |
47 | pkat->peer = peer; |
48 | monotime(&pkat->last); | |
49 | return pkat; | |
50 | } | |
51 | ||
52 | static void pkat_del(void *pkat) | |
53 | { | |
19a713be | 54 | XFREE(MTYPE_BGP_PKAT, pkat); |
03014d48 | 55 | } |
03014d48 | 56 | |
bd8b71e4 | 57 | |
03014d48 | 58 | /* |
424ab01d QY |
59 | * Callback for hash_iterate. Determines if a peer needs a keepalive and if so, |
60 | * generates and sends it. | |
03014d48 QY |
61 | * |
62 | * For any given peer, if the elapsed time since its last keepalive exceeds its | |
63 | * configured keepalive timer, a keepalive is sent to the peer and its | |
64 | * last-sent time is reset. Additionally, If the elapsed time does not exceed | |
65 | * the configured keepalive timer, but the time until the next keepalive is due | |
66 | * is within a hardcoded tolerance, a keepalive is sent as if the configured | |
67 | * timer was exceeded. Doing this helps alleviate nanosecond sleeps between | |
68 | * ticks by grouping together peers who are due for keepalives at roughly the | |
69 | * same time. This tolerance value is arbitrarily chosen to be 100ms. | |
70 | * | |
71 | * In addition, this function calculates the maximum amount of time that the | |
72 | * keepalive thread can sleep before another tick needs to take place. This is | |
73 | * equivalent to shortest time until a keepalive is due for any one peer. | |
74 | * | |
75 | * @return maximum time to wait until next update (0 if infinity) | |
76 | */ | |
e3b78da8 | 77 | static void peer_process(struct hash_bucket *hb, void *arg) |
03014d48 | 78 | { |
bd8b71e4 QY |
79 | struct pkat *pkat = hb->data; |
80 | ||
81 | struct timeval *next_update = arg; | |
03014d48 | 82 | |
03014d48 QY |
83 | static struct timeval elapsed; // elapsed time since keepalive |
84 | static struct timeval ka = {0}; // peer->v_keepalive as a timeval | |
85 | static struct timeval diff; // ka - elapsed | |
86 | ||
2b64873d | 87 | static const struct timeval tolerance = {0, 100000}; |
03014d48 | 88 | |
bfc18a02 QY |
89 | uint32_t v_ka = atomic_load_explicit(&pkat->peer->v_keepalive, |
90 | memory_order_relaxed); | |
91 | ||
92 | /* 0 keepalive timer means no keepalives */ | |
93 | if (v_ka == 0) | |
94 | return; | |
95 | ||
a715eab3 | 96 | /* calculate elapsed time since last keepalive */ |
bd8b71e4 | 97 | monotime_since(&pkat->last, &elapsed); |
03014d48 | 98 | |
a715eab3 | 99 | /* calculate difference between elapsed time and configured time */ |
bfc18a02 | 100 | ka.tv_sec = v_ka; |
bd8b71e4 | 101 | timersub(&ka, &elapsed, &diff); |
03014d48 | 102 | |
bd8b71e4 QY |
103 | int send_keepalive = |
104 | elapsed.tv_sec >= ka.tv_sec || timercmp(&diff, &tolerance, <); | |
03014d48 | 105 | |
bd8b71e4 | 106 | if (send_keepalive) { |
3ffec403 | 107 | if (bgp_debug_keepalive(pkat->peer)) |
bd8b71e4 QY |
108 | zlog_debug("%s [FSM] Timer (keepalive timer expire)", |
109 | pkat->peer->host); | |
03014d48 | 110 | |
bd8b71e4 QY |
111 | bgp_keepalive_send(pkat->peer); |
112 | monotime(&pkat->last); | |
6006b807 | 113 | memset(&elapsed, 0, sizeof(elapsed)); |
a715eab3 | 114 | diff = ka; |
03014d48 QY |
115 | } |
116 | ||
a715eab3 | 117 | /* if calculated next update for this peer < current delay, use it */ |
2ccf91b1 | 118 | if (next_update->tv_sec < 0 || timercmp(&diff, next_update, <)) |
bd8b71e4 QY |
119 | *next_update = diff; |
120 | } | |
121 | ||
74df8d6d | 122 | static bool peer_hash_cmp(const void *f, const void *s) |
bd8b71e4 QY |
123 | { |
124 | const struct pkat *p1 = f; | |
125 | const struct pkat *p2 = s; | |
74df8d6d | 126 | |
bd8b71e4 QY |
127 | return p1->peer == p2->peer; |
128 | } | |
129 | ||
d8b87afe | 130 | static unsigned int peer_hash_key(const void *arg) |
bd8b71e4 | 131 | { |
d8b87afe | 132 | const struct pkat *pkat = arg; |
bd8b71e4 | 133 | return (uintptr_t)pkat->peer; |
03014d48 QY |
134 | } |
135 | ||
a715eab3 | 136 | /* Cleanup handler / deinitializer. */ |
b72b6f4f | 137 | static void bgp_keepalives_finish(void *arg) |
419dfe6a | 138 | { |
d8bc11a5 | 139 | hash_clean_and_free(&peerhash, pkat_del); |
419dfe6a | 140 | |
bd8b71e4 QY |
141 | pthread_mutex_unlock(peerhash_mtx); |
142 | pthread_mutex_destroy(peerhash_mtx); | |
143 | pthread_cond_destroy(peerhash_cond); | |
419dfe6a | 144 | |
19a713be DS |
145 | XFREE(MTYPE_BGP_MUTEX, peerhash_mtx); |
146 | XFREE(MTYPE_BGP_COND, peerhash_cond); | |
419dfe6a QY |
147 | } |
148 | ||
a715eab3 | 149 | /* |
419dfe6a | 150 | * Entry function for peer keepalive generation pthread. |
419dfe6a | 151 | */ |
b72b6f4f | 152 | void *bgp_keepalives_start(void *arg) |
419dfe6a | 153 | { |
a715eab3 QY |
154 | struct frr_pthread *fpt = arg; |
155 | fpt->master->owner = pthread_self(); | |
156 | ||
419dfe6a | 157 | struct timeval currtime = {0, 0}; |
bd8b71e4 | 158 | struct timeval aftertime = {0, 0}; |
419dfe6a QY |
159 | struct timeval next_update = {0, 0}; |
160 | struct timespec next_update_ts = {0, 0}; | |
161 | ||
85ba04f3 MS |
162 | /* |
163 | * The RCU mechanism for each pthread is initialized in a "locked" | |
164 | * state. That's ok for pthreads using the frr_pthread, | |
de2754be | 165 | * event_fetch event loop, because that event loop unlocks regularly. |
85ba04f3 MS |
166 | * For foreign pthreads, the lock needs to be unlocked so that the |
167 | * background rcu pthread can run. | |
168 | */ | |
169 | rcu_read_unlock(); | |
170 | ||
19a713be DS |
171 | peerhash_mtx = XCALLOC(MTYPE_BGP_MUTEX, sizeof(pthread_mutex_t)); |
172 | peerhash_cond = XCALLOC(MTYPE_BGP_COND, sizeof(pthread_cond_t)); | |
a715eab3 QY |
173 | |
174 | /* initialize mutex */ | |
175 | pthread_mutex_init(peerhash_mtx, NULL); | |
176 | ||
177 | /* use monotonic clock with condition variable */ | |
178 | pthread_condattr_t attrs; | |
179 | pthread_condattr_init(&attrs); | |
180 | pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC); | |
181 | pthread_cond_init(peerhash_cond, &attrs); | |
182 | pthread_condattr_destroy(&attrs); | |
183 | ||
c80bedb8 DS |
184 | /* |
185 | * We are not using normal FRR pthread mechanics and are | |
186 | * not using fpt_run | |
187 | */ | |
188 | frr_pthread_set_name(fpt); | |
a9198bc1 | 189 | |
a715eab3 QY |
190 | /* initialize peer hashtable */ |
191 | peerhash = hash_create_size(2048, peer_hash_key, peer_hash_cmp, NULL); | |
bd8b71e4 | 192 | pthread_mutex_lock(peerhash_mtx); |
03014d48 | 193 | |
a715eab3 | 194 | /* register cleanup handler */ |
b72b6f4f | 195 | pthread_cleanup_push(&bgp_keepalives_finish, NULL); |
03014d48 | 196 | |
a715eab3 QY |
197 | /* notify anybody waiting on us that we are done starting up */ |
198 | frr_pthread_notify_running(fpt); | |
03014d48 | 199 | |
a715eab3 | 200 | while (atomic_load_explicit(&fpt->running, memory_order_relaxed)) { |
bd8b71e4 QY |
201 | if (peerhash->count > 0) |
202 | pthread_cond_timedwait(peerhash_cond, peerhash_mtx, | |
03014d48 QY |
203 | &next_update_ts); |
204 | else | |
bd8b71e4 | 205 | while (peerhash->count == 0 |
a715eab3 QY |
206 | && atomic_load_explicit(&fpt->running, |
207 | memory_order_relaxed)) | |
bd8b71e4 | 208 | pthread_cond_wait(peerhash_cond, peerhash_mtx); |
03014d48 QY |
209 | |
210 | monotime(&currtime); | |
bd8b71e4 QY |
211 | |
212 | next_update.tv_sec = -1; | |
213 | ||
214 | hash_iterate(peerhash, peer_process, &next_update); | |
215 | if (next_update.tv_sec == -1) | |
6006b807 | 216 | memset(&next_update, 0, sizeof(next_update)); |
bd8b71e4 QY |
217 | |
218 | monotime_since(&currtime, &aftertime); | |
219 | ||
03014d48 QY |
220 | timeradd(&currtime, &next_update, &next_update); |
221 | TIMEVAL_TO_TIMESPEC(&next_update, &next_update_ts); | |
222 | } | |
223 | ||
a715eab3 | 224 | /* clean up */ |
03014d48 QY |
225 | pthread_cleanup_pop(1); |
226 | ||
227 | return NULL; | |
228 | } | |
229 | ||
230 | /* --- thread external functions ------------------------------------------- */ | |
231 | ||
b72b6f4f | 232 | void bgp_keepalives_on(struct peer *peer) |
03014d48 | 233 | { |
096476dd QY |
234 | if (CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON)) |
235 | return; | |
236 | ||
1ac267a2 | 237 | struct frr_pthread *fpt = bgp_pth_ka; |
a715eab3 QY |
238 | assert(fpt->running); |
239 | ||
bd8b71e4 QY |
240 | /* placeholder bucket data to use for fast key lookups */ |
241 | static struct pkat holder = {0}; | |
242 | ||
68ede9c4 DS |
243 | /* |
244 | * We need to ensure that bgp_keepalives_init was called first | |
245 | */ | |
246 | assert(peerhash_mtx); | |
934af458 | 247 | |
cb1991af | 248 | frr_with_mutex (peerhash_mtx) { |
bd8b71e4 QY |
249 | holder.peer = peer; |
250 | if (!hash_lookup(peerhash, &holder)) { | |
251 | struct pkat *pkat = pkat_new(peer); | |
8e3aae66 | 252 | (void)hash_get(peerhash, pkat, hash_alloc_intern); |
bd8b71e4 QY |
253 | peer_lock(peer); |
254 | } | |
49507a6f | 255 | SET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON); |
8c9d306c SB |
256 | /* Force the keepalive thread to wake up */ |
257 | pthread_cond_signal(peerhash_cond); | |
03014d48 | 258 | } |
03014d48 QY |
259 | } |
260 | ||
b72b6f4f | 261 | void bgp_keepalives_off(struct peer *peer) |
03014d48 | 262 | { |
096476dd QY |
263 | if (!CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON)) |
264 | return; | |
265 | ||
1ac267a2 | 266 | struct frr_pthread *fpt = bgp_pth_ka; |
a715eab3 QY |
267 | assert(fpt->running); |
268 | ||
bd8b71e4 QY |
269 | /* placeholder bucket data to use for fast key lookups */ |
270 | static struct pkat holder = {0}; | |
49507a6f | 271 | |
68ede9c4 DS |
272 | /* |
273 | * We need to ensure that bgp_keepalives_init was called first | |
274 | */ | |
275 | assert(peerhash_mtx); | |
934af458 | 276 | |
cb1991af | 277 | frr_with_mutex (peerhash_mtx) { |
bd8b71e4 QY |
278 | holder.peer = peer; |
279 | struct pkat *res = hash_release(peerhash, &holder); | |
280 | if (res) { | |
281 | pkat_del(res); | |
282 | peer_unlock(peer); | |
283 | } | |
49507a6f | 284 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON); |
03014d48 | 285 | } |
03014d48 QY |
286 | } |
287 | ||
a715eab3 | 288 | int bgp_keepalives_stop(struct frr_pthread *fpt, void **result) |
0ca8b79f | 289 | { |
a715eab3 QY |
290 | assert(fpt->running); |
291 | ||
8c9d306c SB |
292 | frr_with_mutex (peerhash_mtx) { |
293 | atomic_store_explicit(&fpt->running, false, | |
294 | memory_order_relaxed); | |
295 | pthread_cond_signal(peerhash_cond); | |
296 | } | |
a715eab3 | 297 | |
0ca8b79f QY |
298 | pthread_join(fpt->thread, result); |
299 | return 0; | |
300 | } |