]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_keepalives.c
Merge pull request #13649 from donaldsharp/unlock_the_node_or_else
[mirror_frr.git] / bgpd / bgp_keepalives.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* BGP Keepalives.
3 * Implements a producer thread to generate BGP keepalives for peers.
4 * Copyright (C) 2017 Cumulus Networks, Inc.
5 * Quentin Young
6 */
7
8 /* clang-format off */
9 #include <zebra.h>
10 #include <pthread.h> // for pthread_mutex_lock, pthread_mutex_unlock
11
12 #include "frr_pthread.h" // for frr_pthread
13 #include "hash.h" // for hash, hash_clean, hash_create_size...
14 #include "log.h" // for zlog_debug
15 #include "memory.h" // for MTYPE_TMP, XFREE, XCALLOC, XMALLOC
16 #include "monotime.h" // for monotime, monotime_since
17
18 #include "bgpd/bgpd.h" // for peer, PEER_EVENT_KEEPALIVES_ON, peer...
19 #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events
20 #include "bgpd/bgp_packet.h" // for bgp_keepalive_send
21 #include "bgpd/bgp_keepalives.h"
22 /* clang-format on */
23
24 DEFINE_MTYPE_STATIC(BGPD, BGP_PKAT, "Peer KeepAlive Timer");
25 DEFINE_MTYPE_STATIC(BGPD, BGP_COND, "BGP Peer pthread Conditional");
26 DEFINE_MTYPE_STATIC(BGPD, BGP_MUTEX, "BGP Peer pthread Mutex");
27
28 /*
29 * Peer KeepAlive Timer.
30 * Associates a peer with the time of its last keepalive.
31 */
32 struct pkat {
33 /* the peer to send keepalives to */
34 struct peer *peer;
35 /* absolute time of last keepalive sent */
36 struct timeval last;
37 };
38
39 /* List of peers we are sending keepalives for, and associated mutex. */
40 static pthread_mutex_t *peerhash_mtx;
41 static pthread_cond_t *peerhash_cond;
42 static struct hash *peerhash;
43
44 static struct pkat *pkat_new(struct peer *peer)
45 {
46 struct pkat *pkat = XMALLOC(MTYPE_BGP_PKAT, sizeof(struct pkat));
47 pkat->peer = peer;
48 monotime(&pkat->last);
49 return pkat;
50 }
51
52 static void pkat_del(void *pkat)
53 {
54 XFREE(MTYPE_BGP_PKAT, pkat);
55 }
56
57
58 /*
59 * Callback for hash_iterate. Determines if a peer needs a keepalive and if so,
60 * generates and sends it.
61 *
62 * For any given peer, if the elapsed time since its last keepalive exceeds its
63 * configured keepalive timer, a keepalive is sent to the peer and its
64 * last-sent time is reset. Additionally, If the elapsed time does not exceed
65 * the configured keepalive timer, but the time until the next keepalive is due
66 * is within a hardcoded tolerance, a keepalive is sent as if the configured
67 * timer was exceeded. Doing this helps alleviate nanosecond sleeps between
68 * ticks by grouping together peers who are due for keepalives at roughly the
69 * same time. This tolerance value is arbitrarily chosen to be 100ms.
70 *
71 * In addition, this function calculates the maximum amount of time that the
72 * keepalive thread can sleep before another tick needs to take place. This is
73 * equivalent to shortest time until a keepalive is due for any one peer.
74 *
75 * @return maximum time to wait until next update (0 if infinity)
76 */
77 static void peer_process(struct hash_bucket *hb, void *arg)
78 {
79 struct pkat *pkat = hb->data;
80
81 struct timeval *next_update = arg;
82
83 static struct timeval elapsed; // elapsed time since keepalive
84 static struct timeval ka = {0}; // peer->v_keepalive as a timeval
85 static struct timeval diff; // ka - elapsed
86
87 static const struct timeval tolerance = {0, 100000};
88
89 uint32_t v_ka = atomic_load_explicit(&pkat->peer->v_keepalive,
90 memory_order_relaxed);
91
92 /* 0 keepalive timer means no keepalives */
93 if (v_ka == 0)
94 return;
95
96 /* calculate elapsed time since last keepalive */
97 monotime_since(&pkat->last, &elapsed);
98
99 /* calculate difference between elapsed time and configured time */
100 ka.tv_sec = v_ka;
101 timersub(&ka, &elapsed, &diff);
102
103 int send_keepalive =
104 elapsed.tv_sec >= ka.tv_sec || timercmp(&diff, &tolerance, <);
105
106 if (send_keepalive) {
107 if (bgp_debug_keepalive(pkat->peer))
108 zlog_debug("%s [FSM] Timer (keepalive timer expire)",
109 pkat->peer->host);
110
111 bgp_keepalive_send(pkat->peer);
112 monotime(&pkat->last);
113 memset(&elapsed, 0, sizeof(elapsed));
114 diff = ka;
115 }
116
117 /* if calculated next update for this peer < current delay, use it */
118 if (next_update->tv_sec < 0 || timercmp(&diff, next_update, <))
119 *next_update = diff;
120 }
121
122 static bool peer_hash_cmp(const void *f, const void *s)
123 {
124 const struct pkat *p1 = f;
125 const struct pkat *p2 = s;
126
127 return p1->peer == p2->peer;
128 }
129
130 static unsigned int peer_hash_key(const void *arg)
131 {
132 const struct pkat *pkat = arg;
133 return (uintptr_t)pkat->peer;
134 }
135
136 /* Cleanup handler / deinitializer. */
137 static void bgp_keepalives_finish(void *arg)
138 {
139 hash_clean_and_free(&peerhash, pkat_del);
140
141 pthread_mutex_unlock(peerhash_mtx);
142 pthread_mutex_destroy(peerhash_mtx);
143 pthread_cond_destroy(peerhash_cond);
144
145 XFREE(MTYPE_BGP_MUTEX, peerhash_mtx);
146 XFREE(MTYPE_BGP_COND, peerhash_cond);
147 }
148
149 /*
150 * Entry function for peer keepalive generation pthread.
151 */
152 void *bgp_keepalives_start(void *arg)
153 {
154 struct frr_pthread *fpt = arg;
155 fpt->master->owner = pthread_self();
156
157 struct timeval currtime = {0, 0};
158 struct timeval aftertime = {0, 0};
159 struct timeval next_update = {0, 0};
160 struct timespec next_update_ts = {0, 0};
161
162 /*
163 * The RCU mechanism for each pthread is initialized in a "locked"
164 * state. That's ok for pthreads using the frr_pthread,
165 * event_fetch event loop, because that event loop unlocks regularly.
166 * For foreign pthreads, the lock needs to be unlocked so that the
167 * background rcu pthread can run.
168 */
169 rcu_read_unlock();
170
171 peerhash_mtx = XCALLOC(MTYPE_BGP_MUTEX, sizeof(pthread_mutex_t));
172 peerhash_cond = XCALLOC(MTYPE_BGP_COND, sizeof(pthread_cond_t));
173
174 /* initialize mutex */
175 pthread_mutex_init(peerhash_mtx, NULL);
176
177 /* use monotonic clock with condition variable */
178 pthread_condattr_t attrs;
179 pthread_condattr_init(&attrs);
180 pthread_condattr_setclock(&attrs, CLOCK_MONOTONIC);
181 pthread_cond_init(peerhash_cond, &attrs);
182 pthread_condattr_destroy(&attrs);
183
184 /*
185 * We are not using normal FRR pthread mechanics and are
186 * not using fpt_run
187 */
188 frr_pthread_set_name(fpt);
189
190 /* initialize peer hashtable */
191 peerhash = hash_create_size(2048, peer_hash_key, peer_hash_cmp, NULL);
192 pthread_mutex_lock(peerhash_mtx);
193
194 /* register cleanup handler */
195 pthread_cleanup_push(&bgp_keepalives_finish, NULL);
196
197 /* notify anybody waiting on us that we are done starting up */
198 frr_pthread_notify_running(fpt);
199
200 while (atomic_load_explicit(&fpt->running, memory_order_relaxed)) {
201 if (peerhash->count > 0)
202 pthread_cond_timedwait(peerhash_cond, peerhash_mtx,
203 &next_update_ts);
204 else
205 while (peerhash->count == 0
206 && atomic_load_explicit(&fpt->running,
207 memory_order_relaxed))
208 pthread_cond_wait(peerhash_cond, peerhash_mtx);
209
210 monotime(&currtime);
211
212 next_update.tv_sec = -1;
213
214 hash_iterate(peerhash, peer_process, &next_update);
215 if (next_update.tv_sec == -1)
216 memset(&next_update, 0, sizeof(next_update));
217
218 monotime_since(&currtime, &aftertime);
219
220 timeradd(&currtime, &next_update, &next_update);
221 TIMEVAL_TO_TIMESPEC(&next_update, &next_update_ts);
222 }
223
224 /* clean up */
225 pthread_cleanup_pop(1);
226
227 return NULL;
228 }
229
230 /* --- thread external functions ------------------------------------------- */
231
232 void bgp_keepalives_on(struct peer *peer)
233 {
234 if (CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON))
235 return;
236
237 struct frr_pthread *fpt = bgp_pth_ka;
238 assert(fpt->running);
239
240 /* placeholder bucket data to use for fast key lookups */
241 static struct pkat holder = {0};
242
243 /*
244 * We need to ensure that bgp_keepalives_init was called first
245 */
246 assert(peerhash_mtx);
247
248 frr_with_mutex (peerhash_mtx) {
249 holder.peer = peer;
250 if (!hash_lookup(peerhash, &holder)) {
251 struct pkat *pkat = pkat_new(peer);
252 (void)hash_get(peerhash, pkat, hash_alloc_intern);
253 peer_lock(peer);
254 }
255 SET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON);
256 /* Force the keepalive thread to wake up */
257 pthread_cond_signal(peerhash_cond);
258 }
259 }
260
261 void bgp_keepalives_off(struct peer *peer)
262 {
263 if (!CHECK_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON))
264 return;
265
266 struct frr_pthread *fpt = bgp_pth_ka;
267 assert(fpt->running);
268
269 /* placeholder bucket data to use for fast key lookups */
270 static struct pkat holder = {0};
271
272 /*
273 * We need to ensure that bgp_keepalives_init was called first
274 */
275 assert(peerhash_mtx);
276
277 frr_with_mutex (peerhash_mtx) {
278 holder.peer = peer;
279 struct pkat *res = hash_release(peerhash, &holder);
280 if (res) {
281 pkat_del(res);
282 peer_unlock(peer);
283 }
284 UNSET_FLAG(peer->thread_flags, PEER_THREAD_KEEPALIVES_ON);
285 }
286 }
287
288 int bgp_keepalives_stop(struct frr_pthread *fpt, void **result)
289 {
290 assert(fpt->running);
291
292 frr_with_mutex (peerhash_mtx) {
293 atomic_store_explicit(&fpt->running, false,
294 memory_order_relaxed);
295 pthread_cond_signal(peerhash_cond);
296 }
297
298 pthread_join(fpt->thread, result);
299 return 0;
300 }