]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_io.c
ldpd: Convert to using LIB_ERR_XXX and zlog_ferr
[mirror_frr.git] / bgpd / bgp_io.c
CommitLineData
958b450c 1/* BGP I/O.
51abb4b4 2 * Implements packet I/O in a pthread.
958b450c 3 * Copyright (C) 2017 Cumulus Networks
51abb4b4 4 * Quentin Young
958b450c
QY
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
19 * MA 02110-1301 USA
56257a44
QY
20 */
21
95158b0c 22/* clang-format off */
42cf651e 23#include <zebra.h>
95158b0c 24#include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
56257a44 25
95158b0c
QY
26#include "frr_pthread.h" // for frr_pthread_get, frr_pthread
27#include "linklist.h" // for list_delete, list_delete_all_node, lis...
28#include "log.h" // for zlog_debug, safe_strerror, zlog_err
29#include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE
30#include "network.h" // for ERRNO_IO_RETRY
31#include "stream.h" // for stream_get_endp, stream_getw_from, str...
74ffbfe6 32#include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_...
95158b0c
QY
33#include "thread.h" // for THREAD_OFF, THREAD_ARG, thread, thread...
34#include "zassert.h" // for assert
56257a44 35
42cf651e 36#include "bgpd/bgp_io.h"
95158b0c
QY
37#include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str
38#include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event
39#include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify...
40#include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm
41/* clang-format on */
56257a44 42
424ab01d
QY
43/* forward declarations */
44static uint16_t bgp_write(struct peer *);
45static uint16_t bgp_read(struct peer *);
46static int bgp_process_writes(struct thread *);
47static int bgp_process_reads(struct thread *);
48static bool validate_header(struct peer *);
56257a44 49
424ab01d 50/* generic i/o status codes */
95158b0c
QY
51#define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
52#define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
56257a44 53
a715eab3 54/* Thread external API ----------------------------------------------------- */
56257a44 55
424ab01d 56void bgp_writes_on(struct peer *peer)
56257a44 57{
a715eab3
QY
58 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
59 assert(fpt->running);
f09a656d 60
424ab01d
QY
61 assert(peer->status != Deleted);
62 assert(peer->obuf);
63 assert(peer->ibuf);
64 assert(peer->ibuf_work);
387f984e
QY
65 assert(!peer->t_connect_check_r);
66 assert(!peer->t_connect_check_w);
424ab01d 67 assert(peer->fd);
56257a44 68
b750b0ba
QY
69 thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
70 &peer->t_write);
71 SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
424ab01d 72}
56257a44 73
424ab01d
QY
74void bgp_writes_off(struct peer *peer)
75{
151044ce 76 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
a715eab3 77 assert(fpt->running);
151044ce 78
b750b0ba
QY
79 thread_cancel_async(fpt->master, &peer->t_write, NULL);
80 THREAD_OFF(peer->t_generate_updgrp_packets);
56257a44 81
b750b0ba 82 UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
56257a44
QY
83}
84
424ab01d 85void bgp_reads_on(struct peer *peer)
56257a44 86{
a715eab3
QY
87 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
88 assert(fpt->running);
f09a656d 89
424ab01d
QY
90 assert(peer->status != Deleted);
91 assert(peer->ibuf);
92 assert(peer->fd);
93 assert(peer->ibuf_work);
424ab01d 94 assert(peer->obuf);
387f984e
QY
95 assert(!peer->t_connect_check_r);
96 assert(!peer->t_connect_check_w);
424ab01d
QY
97 assert(peer->fd);
98
b750b0ba
QY
99 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
100 &peer->t_read);
101
102 SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
56257a44
QY
103}
104
424ab01d 105void bgp_reads_off(struct peer *peer)
56257a44 106{
151044ce 107 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
a715eab3 108 assert(fpt->running);
151044ce 109
b750b0ba
QY
110 thread_cancel_async(fpt->master, &peer->t_read, NULL);
111 THREAD_OFF(peer->t_process_packet);
56257a44 112
b750b0ba 113 UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
56257a44
QY
114}
115
a715eab3 116/* Thread internal functions ----------------------------------------------- */
51abb4b4 117
a715eab3 118/*
51abb4b4 119 * Called from I/O pthread when a file descriptor has become ready for writing.
424ab01d
QY
120 */
121static int bgp_process_writes(struct thread *thread)
56257a44 122{
424ab01d
QY
123 static struct peer *peer;
124 peer = THREAD_ARG(thread);
125 uint16_t status;
b750b0ba 126 bool reschedule;
bbac44ac 127 bool fatal = false;
424ab01d
QY
128
129 if (peer->fd < 0)
130 return -1;
131
132 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
133
424ab01d 134 pthread_mutex_lock(&peer->io_mtx);
56257a44 135 {
424ab01d
QY
136 status = bgp_write(peer);
137 reschedule = (stream_fifo_head(peer->obuf) != NULL);
138 }
139 pthread_mutex_unlock(&peer->io_mtx);
56257a44 140
a715eab3
QY
141 /* no problem */
142 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
56257a44 143 }
56257a44 144
a715eab3 145 /* problem */
bbac44ac 146 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
a715eab3 147 reschedule = false;
bbac44ac
QY
148 fatal = true;
149 }
424ab01d
QY
150
151 if (reschedule) {
152 thread_add_write(fpt->master, bgp_process_writes, peer,
153 peer->fd, &peer->t_write);
b785b7ad
QY
154 } else if (!fatal) {
155 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
156 bgp_generate_updgrp_packets, 0);
424ab01d
QY
157 }
158
159 return 0;
56257a44
QY
160}
161
a715eab3 162/*
51abb4b4
QY
163 * Called from I/O pthread when a file descriptor has become ready for reading,
164 * or has hung up.
9eb217ff
QY
165 *
166 * We read as much data as possible, process as many packets as we can and
167 * place them on peer->ibuf for secondary processing by the main thread.
56257a44 168 */
424ab01d 169static int bgp_process_reads(struct thread *thread)
56257a44 170{
e11eeb8c
QY
171 /* clang-format off */
172 static struct peer *peer; // peer to read from
173 uint16_t status; // bgp_read status code
174 bool more = true; // whether we got more data
175 bool fatal = false; // whether fatal error occurred
176 bool added_pkt = false; // whether we pushed onto ->ibuf
e11eeb8c 177 /* clang-format on */
9eb217ff 178
424ab01d 179 peer = THREAD_ARG(thread);
424ab01d 180
97b4a0ec 181 if (peer->fd < 0 || bm->terminating)
424ab01d
QY
182 return -1;
183
184 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
185
424ab01d 186 pthread_mutex_lock(&peer->io_mtx);
56257a44 187 {
424ab01d
QY
188 status = bgp_read(peer);
189 }
190 pthread_mutex_unlock(&peer->io_mtx);
191
9eb217ff
QY
192 /* error checking phase */
193 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
194 /* no problem; just don't process packets */
195 more = false;
196 }
424ab01d 197
9eb217ff
QY
198 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
199 /* problem; tear down session */
200 more = false;
201 fatal = true;
56257a44 202 }
56257a44 203
9eb217ff
QY
204 while (more) {
205 /* static buffer for transferring packets */
206 static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
207 /* shorter alias to peer's input buffer */
74ffbfe6 208 struct ringbuf *ibw = peer->ibuf_work;
9eb217ff 209 /* packet size as given by header */
74ffbfe6 210 uint16_t pktsize = 0;
9eb217ff
QY
211
212 /* check that we have enough data for a header */
74ffbfe6 213 if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
9eb217ff 214 break;
424ab01d 215
a2b6e694 216 /* check that header is valid */
217 if (!validate_header(peer)) {
9eb217ff
QY
218 fatal = true;
219 break;
424ab01d 220 }
424ab01d 221
9eb217ff 222 /* header is valid; retrieve packet size */
74ffbfe6
QY
223 ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
224
225 pktsize = ntohs(pktsize);
424ab01d 226
9eb217ff
QY
227 /* if this fails we are seriously screwed */
228 assert(pktsize <= BGP_MAX_PACKET_SIZE);
229
a715eab3
QY
230 /*
231 * If we have that much data, chuck it into its own
232 * stream and append to input queue for processing.
233 */
74ffbfe6 234 if (ringbuf_remain(ibw) >= pktsize) {
9eb217ff 235 struct stream *pkt = stream_new(pktsize);
74ffbfe6 236 assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
9eb217ff
QY
237 stream_put(pkt, pktbuf, pktsize);
238
239 pthread_mutex_lock(&peer->io_mtx);
240 {
241 stream_fifo_push(peer->ibuf, pkt);
242 }
243 pthread_mutex_unlock(&peer->io_mtx);
244
245 added_pkt = true;
246 } else
247 break;
248 }
249
74ffbfe6 250 assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
9eb217ff
QY
251
252 /* handle invalid header */
253 if (fatal) {
9eb217ff 254 /* wipe buffer just in case someone screwed up */
74ffbfe6 255 ringbuf_wipe(peer->ibuf_work);
9eb217ff 256 } else {
424ab01d
QY
257 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
258 &peer->t_read);
9eb217ff 259 if (added_pkt)
7a86aa5a
QY
260 thread_add_timer_msec(bm->master, bgp_process_packet,
261 peer, 0, &peer->t_process_packet);
9eb217ff 262 }
424ab01d
QY
263
264 return 0;
56257a44
QY
265}
266
a715eab3 267/*
56257a44
QY
268 * Flush peer output buffer.
269 *
270 * This function pops packets off of peer->obuf and writes them to peer->fd.
271 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
424ab01d 272 * and the number of packets on the output buffer, unless an error occurs.
56257a44
QY
273 *
274 * If write() returns an error, the appropriate FSM event is generated.
275 *
276 * The return value is equal to the number of packets written
277 * (which may be zero).
278 */
424ab01d 279static uint16_t bgp_write(struct peer *peer)
56257a44 280{
d7c0a89a 281 uint8_t type;
56257a44
QY
282 struct stream *s;
283 int num;
284 int update_last_write = 0;
285 unsigned int count = 0;
eb2277cf 286 uint32_t uo = 0;
424ab01d 287 uint16_t status = 0;
555e09d4 288 uint32_t wpkt_quanta_old;
56257a44 289
996c9314
LB
290 wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
291 memory_order_relaxed);
555e09d4
QY
292
293 while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
56257a44
QY
294 int writenum;
295 do {
296 writenum = stream_get_endp(s) - stream_get_getp(s);
297 num = write(peer->fd, STREAM_PNT(s), writenum);
298
299 if (num < 0) {
424ab01d 300 if (!ERRNO_IO_RETRY(errno)) {
56257a44 301 BGP_EVENT_ADD(peer, TCP_fatal_error);
424ab01d
QY
302 SET_FLAG(status, BGP_IO_FATAL_ERR);
303 } else {
304 SET_FLAG(status, BGP_IO_TRANS_ERR);
305 }
56257a44
QY
306
307 goto done;
a715eab3 308 } else if (num != writenum)
56257a44
QY
309 stream_forward_getp(s, num);
310
311 } while (num != writenum);
312
313 /* Retrieve BGP packet type. */
314 stream_set_getp(s, BGP_MARKER_SIZE + 2);
315 type = stream_getc(s);
316
317 switch (type) {
318 case BGP_MSG_OPEN:
1588f6f4
QY
319 atomic_fetch_add_explicit(&peer->open_out, 1,
320 memory_order_relaxed);
56257a44
QY
321 break;
322 case BGP_MSG_UPDATE:
1588f6f4
QY
323 atomic_fetch_add_explicit(&peer->update_out, 1,
324 memory_order_relaxed);
eb2277cf 325 uo++;
56257a44
QY
326 break;
327 case BGP_MSG_NOTIFY:
1588f6f4
QY
328 atomic_fetch_add_explicit(&peer->notify_out, 1,
329 memory_order_relaxed);
56257a44
QY
330 /* Double start timer. */
331 peer->v_start *= 2;
332
333 /* Overflow check. */
334 if (peer->v_start >= (60 * 2))
335 peer->v_start = (60 * 2);
336
a715eab3
QY
337 /*
338 * Handle Graceful Restart case where the state changes
339 * to Connect instead of Idle.
340 */
56257a44
QY
341 BGP_EVENT_ADD(peer, BGP_Stop);
342 goto done;
343
344 case BGP_MSG_KEEPALIVE:
1588f6f4
QY
345 atomic_fetch_add_explicit(&peer->keepalive_out, 1,
346 memory_order_relaxed);
56257a44
QY
347 break;
348 case BGP_MSG_ROUTE_REFRESH_NEW:
349 case BGP_MSG_ROUTE_REFRESH_OLD:
1588f6f4
QY
350 atomic_fetch_add_explicit(&peer->refresh_out, 1,
351 memory_order_relaxed);
56257a44
QY
352 break;
353 case BGP_MSG_CAPABILITY:
1588f6f4
QY
354 atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1,
355 memory_order_relaxed);
56257a44
QY
356 break;
357 }
358
359 count++;
424ab01d 360
56257a44
QY
361 stream_free(stream_fifo_pop(peer->obuf));
362 update_last_write = 1;
363 }
364
365done : {
eb2277cf
LB
366 /*
367 * Update last_update if UPDATEs were written.
368 * Note: that these are only updated at end,
369 * not per message (i.e., per loop)
370 */
371 if (uo)
1588f6f4
QY
372 atomic_store_explicit(&peer->last_update, bgp_clock(),
373 memory_order_relaxed);
56257a44 374
5c075a90 375 /* If we TXed any flavor of packet */
56257a44 376 if (update_last_write)
1588f6f4
QY
377 atomic_store_explicit(&peer->last_write, bgp_clock(),
378 memory_order_relaxed);
56257a44
QY
379}
380
424ab01d
QY
381 return status;
382}
383
a715eab3 384/*
51abb4b4 385 * Reads a chunk of data from peer->fd into peer->ibuf_work.
424ab01d 386 *
51abb4b4 387 * @return status flag (see top-of-file)
424ab01d
QY
388 */
389static uint16_t bgp_read(struct peer *peer)
390{
b750b0ba
QY
391 size_t readsize; // how many bytes we want to read
392 ssize_t nbytes; // how many bytes we actually read
424ab01d 393 uint16_t status = 0;
74ffbfe6 394 static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
424ab01d 395
74ffbfe6
QY
396 readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
397 nbytes = read(peer->fd, ibw, readsize);
424ab01d 398
74ffbfe6
QY
399 /* EAGAIN or EWOULDBLOCK; come back later */
400 if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
401 SET_FLAG(status, BGP_IO_TRANS_ERR);
996c9314 402 /* Fatal error; tear down session */
74ffbfe6 403 } else if (nbytes < 0) {
85145b62
QY
404 zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
405 safe_strerror(errno));
406
407 if (peer->status == Established) {
408 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
409 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
410 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
411 } else
412 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
413 }
424ab01d 414
85145b62
QY
415 BGP_EVENT_ADD(peer, TCP_fatal_error);
416 SET_FLAG(status, BGP_IO_FATAL_ERR);
996c9314 417 /* Received EOF / TCP session closed */
74ffbfe6 418 } else if (nbytes == 0) {
85145b62
QY
419 if (bgp_debug_neighbor_events(peer))
420 zlog_debug("%s [Event] BGP connection closed fd %d",
421 peer->host, peer->fd);
422
423 if (peer->status == Established) {
424 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
425 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
426 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
427 } else
428 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
424ab01d
QY
429 }
430
85145b62
QY
431 BGP_EVENT_ADD(peer, TCP_connection_closed);
432 SET_FLAG(status, BGP_IO_FATAL_ERR);
74ffbfe6
QY
433 } else {
434 assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
435 == (size_t)nbytes);
424ab01d
QY
436 }
437
424ab01d
QY
438 return status;
439}
440
441/*
442 * Called after we have read a BGP packet header. Validates marker, message
443 * type and packet length. If any of these aren't correct, sends a notify.
74ffbfe6
QY
444 *
445 * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
446 * buffer.
424ab01d
QY
447 */
448static bool validate_header(struct peer *peer)
449{
3fe63c29
QY
450 uint16_t size;
451 uint8_t type;
74ffbfe6 452 struct ringbuf *pkt = peer->ibuf_work;
424ab01d 453
74ffbfe6
QY
454 static uint8_t m_correct[BGP_MARKER_SIZE] = {
455 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
456 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
457 uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
442c9afb 458
74ffbfe6
QY
459 if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
460 return false;
461
462 if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
442c9afb
QY
463 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
464 BGP_NOTIFY_HEADER_NOT_SYNC);
465 return false;
466 }
424ab01d 467
74ffbfe6
QY
468 /* Get size and type in network byte order. */
469 ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
470 ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
471
472 size = ntohs(size);
424ab01d
QY
473
474 /* BGP type check. */
475 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
476 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
477 && type != BGP_MSG_ROUTE_REFRESH_NEW
478 && type != BGP_MSG_ROUTE_REFRESH_OLD
479 && type != BGP_MSG_CAPABILITY) {
3fe63c29 480 if (bgp_debug_neighbor_events(peer))
424ab01d
QY
481 zlog_debug("%s unknown message type 0x%02x", peer->host,
482 type);
483
484 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
996c9314
LB
485 BGP_NOTIFY_HEADER_BAD_MESTYPE, &type,
486 1);
424ab01d
QY
487 return false;
488 }
489
3fe63c29 490 /* Minimum packet length check. */
424ab01d
QY
491 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
492 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
493 || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
494 || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)
495 || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
496 || (type == BGP_MSG_ROUTE_REFRESH_NEW
497 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
498 || (type == BGP_MSG_ROUTE_REFRESH_OLD
499 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
500 || (type == BGP_MSG_CAPABILITY
501 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
1588f6f4 502 if (bgp_debug_neighbor_events(peer)) {
424ab01d
QY
503 zlog_debug("%s bad message length - %d for %s",
504 peer->host, size,
505 type == 128 ? "ROUTE-REFRESH"
996c9314 506 : bgp_type_str[(int)type]);
1588f6f4 507 }
424ab01d 508
3fe63c29
QY
509 uint16_t nsize = htons(size);
510
424ab01d
QY
511 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
512 BGP_NOTIFY_HEADER_BAD_MESLEN,
996c9314 513 (unsigned char *)&nsize, 2);
424ab01d
QY
514 return false;
515 }
516
517 return true;
56257a44 518}