]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_io.c
bgpd: add peer description for each afi/safi line in show summary
[mirror_frr.git] / bgpd / bgp_io.c
1 /* BGP I/O.
2 * Implements packet I/O in a pthread.
3 * Copyright (C) 2017 Cumulus Networks
4 * Quentin Young
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
19 * MA 02110-1301 USA
20 */
21
22 /* clang-format off */
23 #include <zebra.h>
24 #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
25 #include <sys/uio.h> // for writev
26
27 #include "frr_pthread.h"
28 #include "linklist.h" // for list_delete, list_delete_all_node, lis...
29 #include "log.h" // for zlog_debug, safe_strerror, zlog_err
30 #include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE
31 #include "network.h" // for ERRNO_IO_RETRY
32 #include "stream.h" // for stream_get_endp, stream_getw_from, str...
33 #include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_...
34 #include "thread.h" // for THREAD_OFF, THREAD_ARG, thread...
35 #include "zassert.h" // for assert
36
37 #include "bgpd/bgp_io.h"
38 #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str
39 #include "bgpd/bgp_errors.h" // for expanded error reference information
40 #include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event
41 #include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify...
42 #include "bgpd/bgp_trace.h" // for frrtraces
43 #include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm
44 /* clang-format on */
45
46 /* forward declarations */
47 static uint16_t bgp_write(struct peer *);
48 static uint16_t bgp_read(struct peer *);
49 static int bgp_process_writes(struct thread *);
50 static int bgp_process_reads(struct thread *);
51 static bool validate_header(struct peer *);
52
53 /* generic i/o status codes */
54 #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
55 #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
56
57 /* Thread external API ----------------------------------------------------- */
58
59 void bgp_writes_on(struct peer *peer)
60 {
61 struct frr_pthread *fpt = bgp_pth_io;
62 assert(fpt->running);
63
64 assert(peer->status != Deleted);
65 assert(peer->obuf);
66 assert(peer->ibuf);
67 assert(peer->ibuf_work);
68 assert(!peer->t_connect_check_r);
69 assert(!peer->t_connect_check_w);
70 assert(peer->fd);
71
72 thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
73 &peer->t_write);
74 SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
75 }
76
77 void bgp_writes_off(struct peer *peer)
78 {
79 struct frr_pthread *fpt = bgp_pth_io;
80 assert(fpt->running);
81
82 thread_cancel_async(fpt->master, &peer->t_write, NULL);
83 THREAD_OFF(peer->t_generate_updgrp_packets);
84
85 UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
86 }
87
88 void bgp_reads_on(struct peer *peer)
89 {
90 struct frr_pthread *fpt = bgp_pth_io;
91 assert(fpt->running);
92
93 assert(peer->status != Deleted);
94 assert(peer->ibuf);
95 assert(peer->fd);
96 assert(peer->ibuf_work);
97 assert(peer->obuf);
98 assert(!peer->t_connect_check_r);
99 assert(!peer->t_connect_check_w);
100 assert(peer->fd);
101
102 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
103 &peer->t_read);
104
105 SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
106 }
107
108 void bgp_reads_off(struct peer *peer)
109 {
110 struct frr_pthread *fpt = bgp_pth_io;
111 assert(fpt->running);
112
113 thread_cancel_async(fpt->master, &peer->t_read, NULL);
114 THREAD_OFF(peer->t_process_packet);
115
116 UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
117 }
118
119 /* Thread internal functions ----------------------------------------------- */
120
121 /*
122 * Called from I/O pthread when a file descriptor has become ready for writing.
123 */
124 static int bgp_process_writes(struct thread *thread)
125 {
126 static struct peer *peer;
127 peer = THREAD_ARG(thread);
128 uint16_t status;
129 bool reschedule;
130 bool fatal = false;
131
132 if (peer->fd < 0)
133 return -1;
134
135 struct frr_pthread *fpt = bgp_pth_io;
136
137 frr_with_mutex(&peer->io_mtx) {
138 status = bgp_write(peer);
139 reschedule = (stream_fifo_head(peer->obuf) != NULL);
140 }
141
142 /* no problem */
143 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
144 }
145
146 /* problem */
147 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
148 reschedule = false;
149 fatal = true;
150 }
151
152 /* If suppress fib pending is enabled, route is advertised to peers when
153 * the status is received from the FIB. The delay is added
154 * to update group packet generate which will allow more routes to be
155 * sent in the update message
156 */
157 if (reschedule) {
158 thread_add_write(fpt->master, bgp_process_writes, peer,
159 peer->fd, &peer->t_write);
160 } else if (!fatal) {
161 BGP_UPDATE_GROUP_TIMER_ON(&peer->t_generate_updgrp_packets,
162 bgp_generate_updgrp_packets);
163 }
164
165 return 0;
166 }
167
168 /*
169 * Called from I/O pthread when a file descriptor has become ready for reading,
170 * or has hung up.
171 *
172 * We read as much data as possible, process as many packets as we can and
173 * place them on peer->ibuf for secondary processing by the main thread.
174 */
175 static int bgp_process_reads(struct thread *thread)
176 {
177 /* clang-format off */
178 static struct peer *peer; // peer to read from
179 uint16_t status; // bgp_read status code
180 bool more = true; // whether we got more data
181 bool fatal = false; // whether fatal error occurred
182 bool added_pkt = false; // whether we pushed onto ->ibuf
183 /* clang-format on */
184
185 peer = THREAD_ARG(thread);
186
187 if (peer->fd < 0 || bm->terminating)
188 return -1;
189
190 struct frr_pthread *fpt = bgp_pth_io;
191
192 frr_with_mutex(&peer->io_mtx) {
193 status = bgp_read(peer);
194 }
195
196 /* error checking phase */
197 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
198 /* no problem; just don't process packets */
199 more = false;
200 }
201
202 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
203 /* problem; tear down session */
204 more = false;
205 fatal = true;
206 }
207
208 while (more) {
209 /* static buffer for transferring packets */
210 /* shorter alias to peer's input buffer */
211 struct ringbuf *ibw = peer->ibuf_work;
212 /* packet size as given by header */
213 uint16_t pktsize = 0;
214
215 /* check that we have enough data for a header */
216 if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
217 break;
218
219 /* check that header is valid */
220 if (!validate_header(peer)) {
221 fatal = true;
222 break;
223 }
224
225 /* header is valid; retrieve packet size */
226 ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
227
228 pktsize = ntohs(pktsize);
229
230 /* if this fails we are seriously screwed */
231 assert(pktsize <= BGP_MAX_PACKET_SIZE);
232
233 /*
234 * If we have that much data, chuck it into its own
235 * stream and append to input queue for processing.
236 */
237 if (ringbuf_remain(ibw) >= pktsize) {
238 struct stream *pkt = stream_new(pktsize);
239 assert(STREAM_WRITEABLE(pkt) == pktsize);
240 assert(ringbuf_get(ibw, pkt->data, pktsize) == pktsize);
241 stream_set_endp(pkt, pktsize);
242
243 frrtrace(2, frr_bgp, packet_read, peer, pkt);
244 frr_with_mutex(&peer->io_mtx) {
245 stream_fifo_push(peer->ibuf, pkt);
246 }
247
248 added_pkt = true;
249 } else
250 break;
251 }
252
253 /* handle invalid header */
254 if (fatal) {
255 /* wipe buffer just in case someone screwed up */
256 ringbuf_wipe(peer->ibuf_work);
257 } else {
258 assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
259
260 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
261 &peer->t_read);
262 if (added_pkt)
263 thread_add_timer_msec(bm->master, bgp_process_packet,
264 peer, 0, &peer->t_process_packet);
265 }
266
267 return 0;
268 }
269
270 /*
271 * Flush peer output buffer.
272 *
273 * This function pops packets off of peer->obuf and writes them to peer->fd.
274 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
275 * and the number of packets on the output buffer, unless an error occurs.
276 *
277 * If write() returns an error, the appropriate FSM event is generated.
278 *
279 * The return value is equal to the number of packets written
280 * (which may be zero).
281 */
282 static uint16_t bgp_write(struct peer *peer)
283 {
284 uint8_t type;
285 struct stream *s;
286 int update_last_write = 0;
287 unsigned int count;
288 uint32_t uo = 0;
289 uint16_t status = 0;
290 uint32_t wpkt_quanta_old;
291
292 int writenum = 0;
293 int num;
294 unsigned int iovsz;
295 unsigned int strmsz;
296 unsigned int total_written;
297
298 wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
299 memory_order_relaxed);
300 struct stream *ostreams[wpkt_quanta_old];
301 struct stream **streams = ostreams;
302 struct iovec iov[wpkt_quanta_old];
303
304 s = stream_fifo_head(peer->obuf);
305
306 if (!s)
307 goto done;
308
309 count = iovsz = 0;
310 while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) {
311 ostreams[iovsz] = s;
312 iov[iovsz].iov_base = stream_pnt(s);
313 iov[iovsz].iov_len = STREAM_READABLE(s);
314 writenum += STREAM_READABLE(s);
315 s = s->next;
316 ++iovsz;
317 ++count;
318 }
319
320 strmsz = iovsz;
321 total_written = 0;
322
323 do {
324 num = writev(peer->fd, iov, iovsz);
325
326 if (num < 0) {
327 if (!ERRNO_IO_RETRY(errno)) {
328 BGP_EVENT_ADD(peer, TCP_fatal_error);
329 SET_FLAG(status, BGP_IO_FATAL_ERR);
330 } else {
331 SET_FLAG(status, BGP_IO_TRANS_ERR);
332 }
333
334 break;
335 } else if (num != writenum) {
336 unsigned int msg_written = 0;
337 unsigned int ic = iovsz;
338
339 for (unsigned int i = 0; i < ic; i++) {
340 size_t ss = iov[i].iov_len;
341
342 if (ss > (unsigned int) num)
343 break;
344
345 msg_written++;
346 iovsz--;
347 writenum -= ss;
348 num -= ss;
349 }
350
351 total_written += msg_written;
352
353 assert(total_written < count);
354
355 memmove(&iov, &iov[msg_written],
356 sizeof(iov[0]) * iovsz);
357 streams = &streams[msg_written];
358 stream_forward_getp(streams[0], num);
359 iov[0].iov_base = stream_pnt(streams[0]);
360 iov[0].iov_len = STREAM_READABLE(streams[0]);
361
362 writenum -= num;
363 num = 0;
364 assert(writenum > 0);
365 } else {
366 total_written = strmsz;
367 }
368
369 } while (num != writenum);
370
371 /* Handle statistics */
372 for (unsigned int i = 0; i < total_written; i++) {
373 s = stream_fifo_pop(peer->obuf);
374
375 assert(s == ostreams[i]);
376
377 /* Retrieve BGP packet type. */
378 stream_set_getp(s, BGP_MARKER_SIZE + 2);
379 type = stream_getc(s);
380
381 switch (type) {
382 case BGP_MSG_OPEN:
383 atomic_fetch_add_explicit(&peer->open_out, 1,
384 memory_order_relaxed);
385 break;
386 case BGP_MSG_UPDATE:
387 atomic_fetch_add_explicit(&peer->update_out, 1,
388 memory_order_relaxed);
389 uo++;
390 break;
391 case BGP_MSG_NOTIFY:
392 atomic_fetch_add_explicit(&peer->notify_out, 1,
393 memory_order_relaxed);
394 /* Double start timer. */
395 peer->v_start *= 2;
396
397 /* Overflow check. */
398 if (peer->v_start >= (60 * 2))
399 peer->v_start = (60 * 2);
400
401 /*
402 * Handle Graceful Restart case where the state changes
403 * to Connect instead of Idle.
404 */
405 BGP_EVENT_ADD(peer, BGP_Stop);
406 goto done;
407
408 case BGP_MSG_KEEPALIVE:
409 atomic_fetch_add_explicit(&peer->keepalive_out, 1,
410 memory_order_relaxed);
411 break;
412 case BGP_MSG_ROUTE_REFRESH_NEW:
413 case BGP_MSG_ROUTE_REFRESH_OLD:
414 atomic_fetch_add_explicit(&peer->refresh_out, 1,
415 memory_order_relaxed);
416 break;
417 case BGP_MSG_CAPABILITY:
418 atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1,
419 memory_order_relaxed);
420 break;
421 }
422
423 stream_free(s);
424 ostreams[i] = NULL;
425 update_last_write = 1;
426 }
427
428 done : {
429 /*
430 * Update last_update if UPDATEs were written.
431 * Note: that these are only updated at end,
432 * not per message (i.e., per loop)
433 */
434 if (uo)
435 atomic_store_explicit(&peer->last_update, bgp_clock(),
436 memory_order_relaxed);
437
438 /* If we TXed any flavor of packet */
439 if (update_last_write)
440 atomic_store_explicit(&peer->last_write, bgp_clock(),
441 memory_order_relaxed);
442 }
443
444 return status;
445 }
446
447 /*
448 * Reads a chunk of data from peer->fd into peer->ibuf_work.
449 *
450 * @return status flag (see top-of-file)
451 */
452 static uint16_t bgp_read(struct peer *peer)
453 {
454 size_t readsize; // how many bytes we want to read
455 ssize_t nbytes; // how many bytes we actually read
456 uint16_t status = 0;
457 static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
458
459 readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
460 nbytes = read(peer->fd, ibw, readsize);
461
462 /* EAGAIN or EWOULDBLOCK; come back later */
463 if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
464 SET_FLAG(status, BGP_IO_TRANS_ERR);
465 /* Fatal error; tear down session */
466 } else if (nbytes < 0) {
467 flog_err(EC_BGP_UPDATE_RCV,
468 "%s [Error] bgp_read_packet error: %s", peer->host,
469 safe_strerror(errno));
470
471 if (peer->status == Established) {
472 if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
473 || CHECK_FLAG(peer->flags,
474 PEER_FLAG_GRACEFUL_RESTART_HELPER))
475 && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
476 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
477 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
478 } else
479 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
480 }
481
482 BGP_EVENT_ADD(peer, TCP_fatal_error);
483 SET_FLAG(status, BGP_IO_FATAL_ERR);
484 /* Received EOF / TCP session closed */
485 } else if (nbytes == 0) {
486 if (bgp_debug_neighbor_events(peer))
487 zlog_debug("%s [Event] BGP connection closed fd %d",
488 peer->host, peer->fd);
489
490 if (peer->status == Established) {
491 if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
492 || CHECK_FLAG(peer->flags,
493 PEER_FLAG_GRACEFUL_RESTART_HELPER))
494 && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
495 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
496 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
497 } else
498 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
499 }
500
501 BGP_EVENT_ADD(peer, TCP_connection_closed);
502 SET_FLAG(status, BGP_IO_FATAL_ERR);
503 } else {
504 assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
505 == (size_t)nbytes);
506 }
507
508 return status;
509 }
510
511 /*
512 * Called after we have read a BGP packet header. Validates marker, message
513 * type and packet length. If any of these aren't correct, sends a notify.
514 *
515 * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
516 * buffer.
517 */
518 static bool validate_header(struct peer *peer)
519 {
520 uint16_t size;
521 uint8_t type;
522 struct ringbuf *pkt = peer->ibuf_work;
523
524 static const uint8_t m_correct[BGP_MARKER_SIZE] = {
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
527 uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
528
529 if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
530 return false;
531
532 if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
533 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
534 BGP_NOTIFY_HEADER_NOT_SYNC);
535 return false;
536 }
537
538 /* Get size and type in network byte order. */
539 ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
540 ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
541
542 size = ntohs(size);
543
544 /* BGP type check. */
545 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
546 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
547 && type != BGP_MSG_ROUTE_REFRESH_NEW
548 && type != BGP_MSG_ROUTE_REFRESH_OLD
549 && type != BGP_MSG_CAPABILITY) {
550 if (bgp_debug_neighbor_events(peer))
551 zlog_debug("%s unknown message type 0x%02x", peer->host,
552 type);
553
554 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
555 BGP_NOTIFY_HEADER_BAD_MESTYPE, &type,
556 1);
557 return false;
558 }
559
560 /* Minimum packet length check. */
561 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
562 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
563 || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
564 || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)
565 || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
566 || (type == BGP_MSG_ROUTE_REFRESH_NEW
567 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
568 || (type == BGP_MSG_ROUTE_REFRESH_OLD
569 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
570 || (type == BGP_MSG_CAPABILITY
571 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
572 if (bgp_debug_neighbor_events(peer)) {
573 zlog_debug("%s bad message length - %d for %s",
574 peer->host, size,
575 type == 128 ? "ROUTE-REFRESH"
576 : bgp_type_str[(int)type]);
577 }
578
579 uint16_t nsize = htons(size);
580
581 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
582 BGP_NOTIFY_HEADER_BAD_MESLEN,
583 (unsigned char *)&nsize, 2);
584 return false;
585 }
586
587 return true;
588 }