]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_io.c
Merge pull request #2217 from donaldsharp/pim_threads
[mirror_frr.git] / bgpd / bgp_io.c
1 /* BGP I/O.
2 * Implements packet I/O in a pthread.
3 * Copyright (C) 2017 Cumulus Networks
4 * Quentin Young
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
19 * MA 02110-1301 USA
20 */
21
22 /* clang-format off */
23 #include <zebra.h>
24 #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
25
26 #include "frr_pthread.h" // for frr_pthread_get, frr_pthread
27 #include "linklist.h" // for list_delete, list_delete_all_node, lis...
28 #include "log.h" // for zlog_debug, safe_strerror, zlog_err
29 #include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE
30 #include "network.h" // for ERRNO_IO_RETRY
31 #include "stream.h" // for stream_get_endp, stream_getw_from, str...
32 #include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_...
33 #include "thread.h" // for THREAD_OFF, THREAD_ARG, thread, thread...
34 #include "zassert.h" // for assert
35
36 #include "bgpd/bgp_io.h"
37 #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str
38 #include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event
39 #include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify...
40 #include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm
41 /* clang-format on */
42
43 /* forward declarations */
44 static uint16_t bgp_write(struct peer *);
45 static uint16_t bgp_read(struct peer *);
46 static int bgp_process_writes(struct thread *);
47 static int bgp_process_reads(struct thread *);
48 static bool validate_header(struct peer *);
49
50 /* generic i/o status codes */
51 #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
52 #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
53
54 /* Thread external API ----------------------------------------------------- */
55
56 void bgp_writes_on(struct peer *peer)
57 {
58 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
59 assert(fpt->running);
60
61 assert(peer->status != Deleted);
62 assert(peer->obuf);
63 assert(peer->ibuf);
64 assert(peer->ibuf_work);
65 assert(!peer->t_connect_check_r);
66 assert(!peer->t_connect_check_w);
67 assert(peer->fd);
68
69 thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
70 &peer->t_write);
71 SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
72 }
73
74 void bgp_writes_off(struct peer *peer)
75 {
76 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
77 assert(fpt->running);
78
79 thread_cancel_async(fpt->master, &peer->t_write, NULL);
80 THREAD_OFF(peer->t_generate_updgrp_packets);
81
82 UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
83 }
84
85 void bgp_reads_on(struct peer *peer)
86 {
87 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
88 assert(fpt->running);
89
90 assert(peer->status != Deleted);
91 assert(peer->ibuf);
92 assert(peer->fd);
93 assert(peer->ibuf_work);
94 assert(peer->obuf);
95 assert(!peer->t_connect_check_r);
96 assert(!peer->t_connect_check_w);
97 assert(peer->fd);
98
99 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
100 &peer->t_read);
101
102 SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
103 }
104
105 void bgp_reads_off(struct peer *peer)
106 {
107 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
108 assert(fpt->running);
109
110 thread_cancel_async(fpt->master, &peer->t_read, NULL);
111 THREAD_OFF(peer->t_process_packet);
112
113 UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
114 }
115
116 /* Thread internal functions ----------------------------------------------- */
117
118 /*
119 * Called from I/O pthread when a file descriptor has become ready for writing.
120 */
121 static int bgp_process_writes(struct thread *thread)
122 {
123 static struct peer *peer;
124 peer = THREAD_ARG(thread);
125 uint16_t status;
126 bool reschedule;
127 bool fatal = false;
128
129 if (peer->fd < 0)
130 return -1;
131
132 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
133
134 pthread_mutex_lock(&peer->io_mtx);
135 {
136 status = bgp_write(peer);
137 reschedule = (stream_fifo_head(peer->obuf) != NULL);
138 }
139 pthread_mutex_unlock(&peer->io_mtx);
140
141 /* no problem */
142 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
143 }
144
145 /* problem */
146 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
147 reschedule = false;
148 fatal = true;
149 }
150
151 if (reschedule) {
152 thread_add_write(fpt->master, bgp_process_writes, peer,
153 peer->fd, &peer->t_write);
154 } else if (!fatal) {
155 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
156 bgp_generate_updgrp_packets, 0);
157 }
158
159 return 0;
160 }
161
162 /*
163 * Called from I/O pthread when a file descriptor has become ready for reading,
164 * or has hung up.
165 *
166 * We read as much data as possible, process as many packets as we can and
167 * place them on peer->ibuf for secondary processing by the main thread.
168 */
169 static int bgp_process_reads(struct thread *thread)
170 {
171 /* clang-format off */
172 static struct peer *peer; // peer to read from
173 uint16_t status; // bgp_read status code
174 bool more = true; // whether we got more data
175 bool fatal = false; // whether fatal error occurred
176 bool added_pkt = false; // whether we pushed onto ->ibuf
177 bool header_valid = true; // whether header is valid
178 /* clang-format on */
179
180 peer = THREAD_ARG(thread);
181
182 if (peer->fd < 0 || bm->terminating)
183 return -1;
184
185 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
186
187 pthread_mutex_lock(&peer->io_mtx);
188 {
189 status = bgp_read(peer);
190 }
191 pthread_mutex_unlock(&peer->io_mtx);
192
193 /* error checking phase */
194 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
195 /* no problem; just don't process packets */
196 more = false;
197 }
198
199 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
200 /* problem; tear down session */
201 more = false;
202 fatal = true;
203 }
204
205 while (more) {
206 /* static buffer for transferring packets */
207 static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
208 /* shorter alias to peer's input buffer */
209 struct ringbuf *ibw = peer->ibuf_work;
210 /* packet size as given by header */
211 uint16_t pktsize = 0;
212
213 /* check that we have enough data for a header */
214 if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
215 break;
216
217 /* validate header */
218 header_valid = validate_header(peer);
219
220 if (!header_valid) {
221 fatal = true;
222 break;
223 }
224
225 /* header is valid; retrieve packet size */
226 ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
227
228 pktsize = ntohs(pktsize);
229
230 /* if this fails we are seriously screwed */
231 assert(pktsize <= BGP_MAX_PACKET_SIZE);
232
233 /*
234 * If we have that much data, chuck it into its own
235 * stream and append to input queue for processing.
236 */
237 if (ringbuf_remain(ibw) >= pktsize) {
238 struct stream *pkt = stream_new(pktsize);
239 assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
240 stream_put(pkt, pktbuf, pktsize);
241
242 pthread_mutex_lock(&peer->io_mtx);
243 {
244 stream_fifo_push(peer->ibuf, pkt);
245 }
246 pthread_mutex_unlock(&peer->io_mtx);
247
248 added_pkt = true;
249 } else
250 break;
251 }
252
253 assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
254
255 /* handle invalid header */
256 if (fatal) {
257 /* wipe buffer just in case someone screwed up */
258 ringbuf_wipe(peer->ibuf_work);
259 } else {
260 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
261 &peer->t_read);
262 if (added_pkt)
263 thread_add_timer_msec(bm->master, bgp_process_packet,
264 peer, 0, &peer->t_process_packet);
265 }
266
267 return 0;
268 }
269
270 /*
271 * Flush peer output buffer.
272 *
273 * This function pops packets off of peer->obuf and writes them to peer->fd.
274 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
275 * and the number of packets on the output buffer, unless an error occurs.
276 *
277 * If write() returns an error, the appropriate FSM event is generated.
278 *
279 * The return value is equal to the number of packets written
280 * (which may be zero).
281 */
282 static uint16_t bgp_write(struct peer *peer)
283 {
284 uint8_t type;
285 struct stream *s;
286 int num;
287 int update_last_write = 0;
288 unsigned int count = 0;
289 uint32_t uo = 0;
290 uint16_t status = 0;
291 uint32_t wpkt_quanta_old;
292
293 wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
294 memory_order_relaxed);
295
296 while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
297 int writenum;
298 do {
299 writenum = stream_get_endp(s) - stream_get_getp(s);
300 num = write(peer->fd, STREAM_PNT(s), writenum);
301
302 if (num < 0) {
303 if (!ERRNO_IO_RETRY(errno)) {
304 BGP_EVENT_ADD(peer, TCP_fatal_error);
305 SET_FLAG(status, BGP_IO_FATAL_ERR);
306 } else {
307 SET_FLAG(status, BGP_IO_TRANS_ERR);
308 }
309
310 goto done;
311 } else if (num != writenum)
312 stream_forward_getp(s, num);
313
314 } while (num != writenum);
315
316 /* Retrieve BGP packet type. */
317 stream_set_getp(s, BGP_MARKER_SIZE + 2);
318 type = stream_getc(s);
319
320 switch (type) {
321 case BGP_MSG_OPEN:
322 atomic_fetch_add_explicit(&peer->open_out, 1,
323 memory_order_relaxed);
324 break;
325 case BGP_MSG_UPDATE:
326 atomic_fetch_add_explicit(&peer->update_out, 1,
327 memory_order_relaxed);
328 uo++;
329 break;
330 case BGP_MSG_NOTIFY:
331 atomic_fetch_add_explicit(&peer->notify_out, 1,
332 memory_order_relaxed);
333 /* Double start timer. */
334 peer->v_start *= 2;
335
336 /* Overflow check. */
337 if (peer->v_start >= (60 * 2))
338 peer->v_start = (60 * 2);
339
340 /*
341 * Handle Graceful Restart case where the state changes
342 * to Connect instead of Idle.
343 */
344 BGP_EVENT_ADD(peer, BGP_Stop);
345 goto done;
346
347 case BGP_MSG_KEEPALIVE:
348 atomic_fetch_add_explicit(&peer->keepalive_out, 1,
349 memory_order_relaxed);
350 break;
351 case BGP_MSG_ROUTE_REFRESH_NEW:
352 case BGP_MSG_ROUTE_REFRESH_OLD:
353 atomic_fetch_add_explicit(&peer->refresh_out, 1,
354 memory_order_relaxed);
355 break;
356 case BGP_MSG_CAPABILITY:
357 atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1,
358 memory_order_relaxed);
359 break;
360 }
361
362 count++;
363
364 stream_free(stream_fifo_pop(peer->obuf));
365 update_last_write = 1;
366 }
367
368 done : {
369 /*
370 * Update last_update if UPDATEs were written.
371 * Note: that these are only updated at end,
372 * not per message (i.e., per loop)
373 */
374 if (uo)
375 atomic_store_explicit(&peer->last_update, bgp_clock(),
376 memory_order_relaxed);
377
378 /* If we TXed any flavor of packet */
379 if (update_last_write)
380 atomic_store_explicit(&peer->last_write, bgp_clock(),
381 memory_order_relaxed);
382 }
383
384 return status;
385 }
386
387 /*
388 * Reads a chunk of data from peer->fd into peer->ibuf_work.
389 *
390 * @return status flag (see top-of-file)
391 */
392 static uint16_t bgp_read(struct peer *peer)
393 {
394 size_t readsize; // how many bytes we want to read
395 ssize_t nbytes; // how many bytes we actually read
396 uint16_t status = 0;
397 static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
398
399 readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
400 nbytes = read(peer->fd, ibw, readsize);
401
402 /* EAGAIN or EWOULDBLOCK; come back later */
403 if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
404 SET_FLAG(status, BGP_IO_TRANS_ERR);
405 /* Fatal error; tear down session */
406 } else if (nbytes < 0) {
407 zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
408 safe_strerror(errno));
409
410 if (peer->status == Established) {
411 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
412 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
413 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
414 } else
415 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
416 }
417
418 BGP_EVENT_ADD(peer, TCP_fatal_error);
419 SET_FLAG(status, BGP_IO_FATAL_ERR);
420 /* Received EOF / TCP session closed */
421 } else if (nbytes == 0) {
422 if (bgp_debug_neighbor_events(peer))
423 zlog_debug("%s [Event] BGP connection closed fd %d",
424 peer->host, peer->fd);
425
426 if (peer->status == Established) {
427 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
428 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
429 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
430 } else
431 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
432 }
433
434 BGP_EVENT_ADD(peer, TCP_connection_closed);
435 SET_FLAG(status, BGP_IO_FATAL_ERR);
436 } else {
437 assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
438 == (size_t)nbytes);
439 }
440
441 return status;
442 }
443
444 /*
445 * Called after we have read a BGP packet header. Validates marker, message
446 * type and packet length. If any of these aren't correct, sends a notify.
447 *
448 * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
449 * buffer.
450 */
451 static bool validate_header(struct peer *peer)
452 {
453 uint16_t size;
454 uint8_t type;
455 struct ringbuf *pkt = peer->ibuf_work;
456
457 static uint8_t m_correct[BGP_MARKER_SIZE] = {
458 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
459 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
460 uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
461
462 if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
463 return false;
464
465 if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
466 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
467 BGP_NOTIFY_HEADER_NOT_SYNC);
468 return false;
469 }
470
471 /* Get size and type in network byte order. */
472 ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
473 ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
474
475 size = ntohs(size);
476
477 /* BGP type check. */
478 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
479 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
480 && type != BGP_MSG_ROUTE_REFRESH_NEW
481 && type != BGP_MSG_ROUTE_REFRESH_OLD
482 && type != BGP_MSG_CAPABILITY) {
483 if (bgp_debug_neighbor_events(peer))
484 zlog_debug("%s unknown message type 0x%02x", peer->host,
485 type);
486
487 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
488 BGP_NOTIFY_HEADER_BAD_MESTYPE, &type,
489 1);
490 return false;
491 }
492
493 /* Minimum packet length check. */
494 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
495 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
496 || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
497 || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)
498 || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
499 || (type == BGP_MSG_ROUTE_REFRESH_NEW
500 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
501 || (type == BGP_MSG_ROUTE_REFRESH_OLD
502 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
503 || (type == BGP_MSG_CAPABILITY
504 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
505 if (bgp_debug_neighbor_events(peer)) {
506 zlog_debug("%s bad message length - %d for %s",
507 peer->host, size,
508 type == 128 ? "ROUTE-REFRESH"
509 : bgp_type_str[(int)type]);
510 }
511
512 uint16_t nsize = htons(size);
513
514 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
515 BGP_NOTIFY_HEADER_BAD_MESLEN,
516 (unsigned char *)&nsize, 2);
517 return false;
518 }
519
520 return true;
521 }