-/*
- BGP I/O.
-
- Implements packet I/O in a consumer pthread.
- --------------------------------------------
- Copyright (C) 2017 Cumulus Networks
- Quentin Young
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; see the file COPYING; if not, write to the
- Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
- MA 02110-1301 USA
+/* BGP I/O.
+ * Implements packet I/O in a pthread.
+ * Copyright (C) 2017 Cumulus Networks
+ * Quentin Young
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ * MA 02110-1301 USA
*/
+/* clang-format off */
#include <zebra.h>
-#include <sys/time.h>
-#include <pthread.h>
-
-#include "thread.h"
-#include "hash.h"
-#include "stream.h"
-#include "memory.h"
-#include "log.h"
-#include "monotime.h"
-#include "network.h"
-#include "pqueue.h"
-
-#include "bgpd/bgpd.h"
+#include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
+
+#include "frr_pthread.h" // for frr_pthread_get, frr_pthread
+#include "linklist.h" // for list_delete, list_delete_all_node, lis...
+#include "log.h" // for zlog_debug, safe_strerror, zlog_err
+#include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE
+#include "network.h" // for ERRNO_IO_RETRY
+#include "stream.h" // for stream_get_endp, stream_getw_from, str...
+#include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_...
+#include "thread.h" // for THREAD_OFF, THREAD_ARG, thread, thread...
+#include "zassert.h" // for assert
+
#include "bgpd/bgp_io.h"
-#include "bgpd/bgp_debug.h"
-#include "bgpd/bgp_packet.h"
-#include "bgpd/bgp_fsm.h"
+#include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str
+#include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event
+#include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify...
+#include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm
+/* clang-format on */
/* forward declarations */
static uint16_t bgp_write(struct peer *);
static bool validate_header(struct peer *);
/* generic i/o status codes */
-#define BGP_IO_TRANS_ERR (1 << 1) // EAGAIN or similar occurred
-#define BGP_IO_FATAL_ERR (1 << 2) // some kind of fatal TCP error
-
-/* bgp_read() status codes */
-#define BGP_IO_READ_HEADER (1 << 3) // when read a full packet header
-#define BGP_IO_READ_FULLPACKET (1 << 4) // read a full packet
+#define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
+#define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
-/* Start and stop routines for I/O pthread + control variables
+/* Plumbing & control variables for thread lifecycle
* ------------------------------------------------------------------------ */
-bool bgp_packet_write_thread_run = false;
-pthread_mutex_t *work_mtx;
+bool bgp_io_thread_run;
+pthread_mutex_t *running_cond_mtx;
+pthread_cond_t *running_cond;
+
+/* Unused callback for thread_add_read() */
+static int bgp_io_dummy(struct thread *thread) { return 0; }
-static struct list *read_cancel;
-static struct list *write_cancel;
+/* Poison pill task */
+static int bgp_io_finish(struct thread *thread)
+{
+ bgp_io_thread_run = false;
+ return 0;
+}
+/* Extern lifecycle control functions. init -> start -> stop
+ * ------------------------------------------------------------------------ */
void bgp_io_init()
{
- work_mtx = XCALLOC(MTYPE_TMP, sizeof(pthread_mutex_t));
- pthread_mutex_init(work_mtx, NULL);
+ bgp_io_thread_run = false;
- read_cancel = list_new();
- write_cancel = list_new();
+ running_cond_mtx = XCALLOC(MTYPE_PTHREAD_PRIM, sizeof(pthread_mutex_t));
+ running_cond = XCALLOC(MTYPE_PTHREAD_PRIM, sizeof(pthread_cond_t));
+
+ pthread_mutex_init(running_cond_mtx, NULL);
+ pthread_cond_init(running_cond, NULL);
+
+ /* unlocked in bgp_io_wait_running() */
+ pthread_mutex_lock(running_cond_mtx);
}
void *bgp_io_start(void *arg)
{
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+ fpt->master->owner = pthread_self();
+
+ // fd so we can sleep in poll()
+ int sleeper[2];
+ pipe(sleeper);
+ thread_add_read(fpt->master, &bgp_io_dummy, NULL, sleeper[0], NULL);
// we definitely don't want to handle signals
fpt->master->handle_signals = false;
- bgp_packet_write_thread_run = true;
struct thread task;
- while (bgp_packet_write_thread_run) {
+ pthread_mutex_lock(running_cond_mtx);
+ {
+ bgp_io_thread_run = true;
+ pthread_cond_signal(running_cond);
+ }
+ pthread_mutex_unlock(running_cond_mtx);
+
+ while (bgp_io_thread_run) {
if (thread_fetch(fpt->master, &task)) {
- pthread_mutex_lock(work_mtx);
- {
- bool cancel = false;
- struct peer *peer = THREAD_ARG(&task);
- if ((task.func == bgp_process_reads
- && listnode_lookup(read_cancel, peer))
- || (task.func == bgp_process_writes
- && listnode_lookup(write_cancel, peer)))
- cancel = true;
-
- list_delete_all_node(write_cancel);
- list_delete_all_node(read_cancel);
-
- if (!cancel)
- thread_call(&task);
- }
- pthread_mutex_unlock(work_mtx);
+ thread_call(&task);
}
}
+ close(sleeper[1]);
+ close(sleeper[0]);
+
return NULL;
}
+void bgp_io_wait_running()
+{
+ while (!bgp_io_thread_run)
+ pthread_cond_wait(running_cond, running_cond_mtx);
+
+ /* locked in bgp_io_init() */
+ pthread_mutex_unlock(running_cond_mtx);
+}
+
int bgp_io_stop(void **result, struct frr_pthread *fpt)
{
- fpt->master->spin = false;
- bgp_packet_write_thread_run = false;
- pthread_kill(fpt->thread, SIGINT);
+ thread_add_event(fpt->master, &bgp_io_finish, NULL, 0, NULL);
pthread_join(fpt->thread, result);
- pthread_mutex_unlock(work_mtx);
- pthread_mutex_destroy(work_mtx);
+ pthread_mutex_destroy(running_cond_mtx);
+ pthread_cond_destroy(running_cond);
+
+ XFREE(MTYPE_PTHREAD_PRIM, running_cond_mtx);
+ XFREE(MTYPE_PTHREAD_PRIM, running_cond);
- list_delete(read_cancel);
- list_delete(write_cancel);
- XFREE(MTYPE_TMP, work_mtx);
return 0;
}
-/* ------------------------------------------------------------------------ */
+
+/* Extern API -------------------------------------------------------------- */
void bgp_writes_on(struct peer *peer)
{
+ assert(bgp_io_thread_run);
+
assert(peer->status != Deleted);
assert(peer->obuf);
assert(peer->ibuf);
assert(peer->ibuf_work);
- assert(!peer->t_connect_check);
+ assert(!peer->t_connect_check_r);
+ assert(!peer->t_connect_check_w);
assert(peer->fd);
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
- pthread_mutex_lock(work_mtx);
- {
- listnode_delete(write_cancel, peer);
- thread_add_write(fpt->master, bgp_process_writes, peer,
- peer->fd, &peer->t_write);
- SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
- }
- pthread_mutex_unlock(work_mtx);
+ thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
+ &peer->t_write);
+ SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
}
void bgp_writes_off(struct peer *peer)
{
- pthread_mutex_lock(work_mtx);
- {
- THREAD_OFF(peer->t_write);
- THREAD_OFF(peer->t_generate_updgrp_packets);
- listnode_add(write_cancel, peer);
+ assert(bgp_io_thread_run);
- // peer access by us after this point will result in pain
- UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
- }
- pthread_mutex_unlock(work_mtx);
- /* upon return, i/o thread must not access the peer */
+ struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+
+ thread_cancel_async(fpt->master, &peer->t_write, NULL);
+ THREAD_OFF(peer->t_generate_updgrp_packets);
+
+ UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
}
void bgp_reads_on(struct peer *peer)
{
+ assert(bgp_io_thread_run);
+
assert(peer->status != Deleted);
assert(peer->ibuf);
assert(peer->fd);
assert(peer->ibuf_work);
- assert(stream_get_endp(peer->ibuf_work) == 0);
assert(peer->obuf);
- assert(!peer->t_connect_check);
+ assert(!peer->t_connect_check_r);
+ assert(!peer->t_connect_check_w);
assert(peer->fd);
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
- pthread_mutex_lock(work_mtx);
- {
- listnode_delete(read_cancel, peer);
- thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
- &peer->t_read);
- SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
- }
- pthread_mutex_unlock(work_mtx);
+ thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
+ &peer->t_read);
+
+ SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}
void bgp_reads_off(struct peer *peer)
{
- pthread_mutex_lock(work_mtx);
- {
- THREAD_OFF(peer->t_read);
- THREAD_OFF(peer->t_process_packet);
- listnode_add(read_cancel, peer);
+ assert(bgp_io_thread_run);
- // peer access by us after this point will result in pain
- UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
- }
- pthread_mutex_unlock(work_mtx);
+ struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+
+ thread_cancel_async(fpt->master, &peer->t_read, NULL);
+ THREAD_OFF(peer->t_process_packet);
+
+ UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}
+/* Internal functions ------------------------------------------------------- */
+
/**
- * Called from PTHREAD_IO when select() or poll() determines that the file
- * descriptor is ready to be written to.
+ * Called from I/O pthread when a file descriptor has become ready for writing.
*/
static int bgp_process_writes(struct thread *thread)
{
static struct peer *peer;
peer = THREAD_ARG(thread);
uint16_t status;
+ bool reschedule;
+ bool fatal = false;
if (peer->fd < 0)
return -1;
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
- bool reschedule;
pthread_mutex_lock(&peer->io_mtx);
{
status = bgp_write(peer);
if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { /* no problem */
}
- if (CHECK_FLAG(status, BGP_IO_FATAL_ERR))
- reschedule = 0; // problem
+ if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
+ reschedule = false; /* problem */
+ fatal = true;
+ }
if (reschedule) {
thread_add_write(fpt->master, bgp_process_writes, peer,
peer->fd, &peer->t_write);
- thread_add_background(bm->master, bgp_generate_updgrp_packets,
- peer, 0,
- &peer->t_generate_updgrp_packets);
+ } else if (!fatal) {
+ BGP_TIMER_ON(peer->t_generate_updgrp_packets,
+ bgp_generate_updgrp_packets, 0);
}
return 0;
}
/**
- * Called from PTHREAD_IO when select() or poll() determines that the file
- * descriptor is ready to be read from.
+ * Called from I/O pthread when a file descriptor has become ready for reading,
+ * or has hung up.
+ *
+ * We read as much data as possible, process as many packets as we can and
+ * place them on peer->ibuf for secondary processing by the main thread.
*/
static int bgp_process_reads(struct thread *thread)
{
- static struct peer *peer;
+ /* clang-format off */
+ static struct peer *peer; // peer to read from
+ uint16_t status; // bgp_read status code
+ bool more = true; // whether we got more data
+ bool fatal = false; // whether fatal error occurred
+ bool added_pkt = false; // whether we pushed onto ->ibuf
+ bool header_valid = true; // whether header is valid
+ /* clang-format on */
+
peer = THREAD_ARG(thread);
- uint16_t status;
if (peer->fd < 0)
return -1;
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
- bool reschedule = true;
-
- // execute read
pthread_mutex_lock(&peer->io_mtx);
{
status = bgp_read(peer);
}
pthread_mutex_unlock(&peer->io_mtx);
- // check results of read
- bool header_valid = true;
+ /* error checking phase */
+ if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
+ /* no problem; just don't process packets */
+ more = false;
+ }
- if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { /* no problem */
+ if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
+ /* problem; tear down session */
+ more = false;
+ fatal = true;
}
- if (CHECK_FLAG(status, BGP_IO_FATAL_ERR))
- reschedule = false; // problem
+ while (more) {
+ /* static buffer for transferring packets */
+ static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
+ /* shorter alias to peer's input buffer */
+ struct ringbuf *ibw = peer->ibuf_work;
+ /* packet size as given by header */
+ uint16_t pktsize = 0;
+
+ /* check that we have enough data for a header */
+ if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
+ break;
- if (CHECK_FLAG(status, BGP_IO_READ_HEADER)) {
+ /* validate header */
header_valid = validate_header(peer);
+
if (!header_valid) {
- bgp_size_t packetsize =
- MIN((int)stream_get_endp(peer->ibuf_work),
- BGP_MAX_PACKET_SIZE);
- memcpy(peer->last_reset_cause, peer->ibuf_work->data,
- packetsize);
- peer->last_reset_cause_size = packetsize;
- // We're tearing the session down, no point in
- // rescheduling.
- // Additionally, bgp_read() will use the TLV if it's
- // present to
- // determine how much to read; if this is corrupt, we'll
- // crash the
- // program.
- reschedule = false;
+ fatal = true;
+ break;
}
- }
- // if we read a full packet, push it onto peer->ibuf, reset our WiP
- // buffer
- // and schedule a job to process it on the main thread
- if (header_valid && CHECK_FLAG(status, BGP_IO_READ_FULLPACKET)) {
- pthread_mutex_lock(&peer->io_mtx);
- {
- stream_fifo_push(peer->ibuf,
- stream_dup(peer->ibuf_work));
- }
- pthread_mutex_unlock(&peer->io_mtx);
- stream_reset(peer->ibuf_work);
- assert(stream_get_endp(peer->ibuf_work) == 0);
+ /* header is valid; retrieve packet size */
+ ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
+
+ pktsize = ntohs(pktsize);
+
+ /* if this fails we are seriously screwed */
+ assert(pktsize <= BGP_MAX_PACKET_SIZE);
- thread_add_background(bm->master, bgp_process_packet, peer, 0,
- &peer->t_process_packet);
+ /* If we have that much data, chuck it into its own
+ * stream and append to input queue for processing. */
+ if (ringbuf_remain(ibw) >= pktsize) {
+ struct stream *pkt = stream_new(pktsize);
+ assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
+ stream_put(pkt, pktbuf, pktsize);
+
+ pthread_mutex_lock(&peer->io_mtx);
+ {
+ stream_fifo_push(peer->ibuf, pkt);
+ }
+ pthread_mutex_unlock(&peer->io_mtx);
+
+ added_pkt = true;
+ } else
+ break;
}
- if (reschedule)
+ assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
+
+ /* handle invalid header */
+ if (fatal) {
+ /* wipe buffer just in case someone screwed up */
+ ringbuf_wipe(peer->ibuf_work);
+ } else {
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
+ if (added_pkt)
+ thread_add_timer_msec(bm->master, bgp_process_packet,
+ peer, 0, &peer->t_process_packet);
+ }
return 0;
}
int num;
int update_last_write = 0;
unsigned int count = 0;
- unsigned int oc = 0;
+ uint32_t uo = 0;
uint16_t status = 0;
+ uint32_t wpkt_quanta_old;
- while (count < peer->bgp->wpkt_quanta
- && (s = stream_fifo_head(peer->obuf))) {
+ // cache current write quanta
+ wpkt_quanta_old =
+ atomic_load_explicit(&peer->bgp->wpkt_quanta, memory_order_relaxed);
+
+ while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
int writenum;
do {
writenum = stream_get_endp(s) - stream_get_getp(s);
switch (type) {
case BGP_MSG_OPEN:
- peer->open_out++;
+ atomic_fetch_add_explicit(&peer->open_out, 1,
+ memory_order_relaxed);
break;
case BGP_MSG_UPDATE:
- peer->update_out++;
+ atomic_fetch_add_explicit(&peer->update_out, 1,
+ memory_order_relaxed);
+ uo++;
break;
case BGP_MSG_NOTIFY:
- peer->notify_out++;
+ atomic_fetch_add_explicit(&peer->notify_out, 1,
+ memory_order_relaxed);
/* Double start timer. */
peer->v_start *= 2;
peer->v_start = (60 * 2);
/* Handle Graceful Restart case where the state changes
- to
- Connect instead of Idle */
- /* Flush any existing events */
+ * to Connect instead of Idle */
BGP_EVENT_ADD(peer, BGP_Stop);
goto done;
case BGP_MSG_KEEPALIVE:
- peer->keepalive_out++;
+ atomic_fetch_add_explicit(&peer->keepalive_out, 1,
+ memory_order_relaxed);
break;
case BGP_MSG_ROUTE_REFRESH_NEW:
case BGP_MSG_ROUTE_REFRESH_OLD:
- peer->refresh_out++;
+ atomic_fetch_add_explicit(&peer->refresh_out, 1,
+ memory_order_relaxed);
break;
case BGP_MSG_CAPABILITY:
- peer->dynamic_cap_out++;
+ atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1,
+ memory_order_relaxed);
break;
}
}
done : {
- /* Update last_update if UPDATEs were written. */
- if (peer->update_out > oc)
- peer->last_update = bgp_clock();
-
- /* If we TXed any flavor of packet update last_write */
+ /*
+ * Update last_update if UPDATEs were written.
+ * Note: that these are only updated at end,
+ * not per message (i.e., per loop)
+ */
+ if (uo)
+ atomic_store_explicit(&peer->last_update, bgp_clock(),
+ memory_order_relaxed);
+
+ /* If we TXed any flavor of packet */
if (update_last_write)
- peer->last_write = bgp_clock();
+ atomic_store_explicit(&peer->last_write, bgp_clock(),
+ memory_order_relaxed);
}
return status;
}
/**
- * Reads <= 1 packet worth of data from peer->fd into peer->ibuf_work.
+ * Reads a chunk of data from peer->fd into peer->ibuf_work.
*
- * @return whether a full packet was read
+ * @return status flag (see top-of-file)
*/
static uint16_t bgp_read(struct peer *peer)
{
- int readsize; // how many bytes we want to read
- int nbytes; // how many bytes we actually read
- bool have_header = false;
+ size_t readsize; // how many bytes we want to read
+ ssize_t nbytes; // how many bytes we actually read
uint16_t status = 0;
-
- if (stream_get_endp(peer->ibuf_work) < BGP_HEADER_SIZE)
- readsize = BGP_HEADER_SIZE - stream_get_endp(peer->ibuf_work);
- else {
- // retrieve packet length from tlv and compute # bytes we still
- // need
- u_int16_t mlen =
- stream_getw_from(peer->ibuf_work, BGP_MARKER_SIZE);
- readsize = mlen - stream_get_endp(peer->ibuf_work);
- have_header = true;
- }
-
- nbytes = stream_read_try(peer->ibuf_work, peer->fd, readsize);
-
- if (nbytes <= 0) // handle errors
- {
- switch (nbytes) {
- case -1: // fatal error; tear down the session
- zlog_err("%s [Error] bgp_read_packet error: %s",
- peer->host, safe_strerror(errno));
-
- if (peer->status == Established) {
- if (CHECK_FLAG(peer->sflags,
- PEER_STATUS_NSF_MODE)) {
- peer->last_reset =
- PEER_DOWN_NSF_CLOSE_SESSION;
- SET_FLAG(peer->sflags,
- PEER_STATUS_NSF_WAIT);
- } else
- peer->last_reset =
- PEER_DOWN_CLOSE_SESSION;
- }
-
- BGP_EVENT_ADD(peer, TCP_fatal_error);
- SET_FLAG(status, BGP_IO_FATAL_ERR);
- break;
-
- case 0: // TCP session closed
- if (bgp_debug_neighbor_events(peer))
- zlog_debug(
- "%s [Event] BGP connection closed fd %d",
- peer->host, peer->fd);
-
- if (peer->status == Established) {
- if (CHECK_FLAG(peer->sflags,
- PEER_STATUS_NSF_MODE)) {
- peer->last_reset =
- PEER_DOWN_NSF_CLOSE_SESSION;
- SET_FLAG(peer->sflags,
- PEER_STATUS_NSF_WAIT);
- } else
- peer->last_reset =
- PEER_DOWN_CLOSE_SESSION;
- }
-
- BGP_EVENT_ADD(peer, TCP_connection_closed);
- SET_FLAG(status, BGP_IO_FATAL_ERR);
- break;
-
- case -2: // temporary error; come back later
- SET_FLAG(status, BGP_IO_TRANS_ERR);
- break;
- default:
- break;
+ static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
+
+ readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
+ nbytes = read(peer->fd, ibw, readsize);
+
+ /* EAGAIN or EWOULDBLOCK; come back later */
+ if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
+ SET_FLAG(status, BGP_IO_TRANS_ERR);
+ /* Fatal error; tear down session */
+ } else if (nbytes < 0) {
+ zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
+ safe_strerror(errno));
+
+ if (peer->status == Established) {
+ if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
+ peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
+ SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
+ } else
+ peer->last_reset = PEER_DOWN_CLOSE_SESSION;
}
- return status;
- }
+ BGP_EVENT_ADD(peer, TCP_fatal_error);
+ SET_FLAG(status, BGP_IO_FATAL_ERR);
+ /* Received EOF / TCP session closed */
+ } else if (nbytes == 0) {
+ if (bgp_debug_neighbor_events(peer))
+ zlog_debug("%s [Event] BGP connection closed fd %d",
+ peer->host, peer->fd);
+
+ if (peer->status == Established) {
+ if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
+ peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
+ SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
+ } else
+ peer->last_reset = PEER_DOWN_CLOSE_SESSION;
+ }
- // If we didn't have the header before read(), and now we do, set the
- // appropriate flag. The caller must validate the header for us.
- if (!have_header
- && stream_get_endp(peer->ibuf_work) >= BGP_HEADER_SIZE) {
- SET_FLAG(status, BGP_IO_READ_HEADER);
- have_header = true;
+ BGP_EVENT_ADD(peer, TCP_connection_closed);
+ SET_FLAG(status, BGP_IO_FATAL_ERR);
+ } else {
+ assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
+ == (size_t)nbytes);
}
- // If we read the # of bytes specified in the tlv, we have read a full
- // packet.
- //
- // Note that the header may not have been validated here. This flag
- // means
- // ONLY that we read the # of bytes specified in the header; if the
- // header is
- // not valid, the packet MUST NOT be processed further.
- if (have_header && (stream_getw_from(peer->ibuf_work, BGP_MARKER_SIZE)
- == stream_get_endp(peer->ibuf_work)))
- SET_FLAG(status, BGP_IO_READ_FULLPACKET);
return status;
}
/*
* Called after we have read a BGP packet header. Validates marker, message
* type and packet length. If any of these aren't correct, sends a notify.
+ *
+ * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
+ * buffer.
*/
static bool validate_header(struct peer *peer)
{
- u_int16_t size, type;
-
- /* Marker check */
- for (int i = 0; i < BGP_MARKER_SIZE; i++)
- if (peer->ibuf_work->data[i] != 0xff) {
- bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
- BGP_NOTIFY_HEADER_NOT_SYNC);
- return false;
- }
+ uint16_t size;
+ uint8_t type;
+ struct ringbuf *pkt = peer->ibuf_work;
- /* Get size and type. */
- size = stream_getw_from(peer->ibuf_work, BGP_MARKER_SIZE);
- type = stream_getc_from(peer->ibuf_work, BGP_MARKER_SIZE + 2);
+ static uint8_t m_correct[BGP_MARKER_SIZE] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
+
+ if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
+ return false;
+
+ if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
+ bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
+ BGP_NOTIFY_HEADER_NOT_SYNC);
+ return false;
+ }
+
+ /* Get size and type in network byte order. */
+ ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
+ ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
+
+ size = ntohs(size);
/* BGP type check. */
if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
BGP_NOTIFY_HEADER_BAD_MESTYPE,
- (u_char *)&type, 1);
+ &type, 1);
return false;
}
- /* Mimimum packet length check. */
+ /* Minimum packet length check. */
if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
|| (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
|| (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
&& size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
|| (type == BGP_MSG_CAPABILITY
&& size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
- if (bgp_debug_neighbor_events(peer))
+ if (bgp_debug_neighbor_events(peer)) {
zlog_debug("%s bad message length - %d for %s",
peer->host, size,
type == 128 ? "ROUTE-REFRESH"
- : bgp_type_str[(int)type]);
+ : bgp_type_str[(int) type]);
+ }
+
+ uint16_t nsize = htons(size);
bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
BGP_NOTIFY_HEADER_BAD_MESLEN,
- (u_char *)&size, 2);
+ (unsigned char *) &nsize, 2);
return false;
}