#include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE
#include "network.h" // for ERRNO_IO_RETRY
#include "stream.h" // for stream_get_endp, stream_getw_from, str...
+#include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_...
#include "thread.h" // for THREAD_OFF, THREAD_ARG, thread, thread...
#include "zassert.h" // for assert
#define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
#define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
-/* Plumbing & control variables for thread lifecycle
- * ------------------------------------------------------------------------ */
-bool bgp_io_thread_run;
-pthread_mutex_t *running_cond_mtx;
-pthread_cond_t *running_cond;
-
-/* Unused callback for thread_add_read() */
-static int bgp_io_dummy(struct thread *thread) { return 0; }
-
-/* Poison pill task */
-static int bgp_io_finish(struct thread *thread)
-{
- bgp_io_thread_run = false;
- return 0;
-}
-
-/* Extern lifecycle control functions. init -> start -> stop
- * ------------------------------------------------------------------------ */
-void bgp_io_init()
-{
- bgp_io_thread_run = false;
-
- running_cond_mtx = XCALLOC(MTYPE_PTHREAD_PRIM, sizeof(pthread_mutex_t));
- running_cond = XCALLOC(MTYPE_PTHREAD_PRIM, sizeof(pthread_cond_t));
-
- pthread_mutex_init(running_cond_mtx, NULL);
- pthread_cond_init(running_cond, NULL);
-
- pthread_mutex_lock(running_cond_mtx);
-}
-
-void *bgp_io_start(void *arg)
-{
- struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
- fpt->master->owner = pthread_self();
-
- // fd so we can sleep in poll()
- int sleeper[2];
- pipe(sleeper);
- thread_add_read(fpt->master, &bgp_io_dummy, NULL, sleeper[0], NULL);
-
- // we definitely don't want to handle signals
- fpt->master->handle_signals = false;
-
- struct thread task;
-
- bgp_io_thread_run = true;
- pthread_mutex_lock(running_cond_mtx);
- {
- pthread_cond_signal(running_cond);
- }
- pthread_mutex_unlock(running_cond_mtx);
-
- while (bgp_io_thread_run) {
- if (thread_fetch(fpt->master, &task)) {
- thread_call(&task);
- }
- }
-
- close(sleeper[1]);
- close(sleeper[0]);
-
- return NULL;
-}
-
-void bgp_io_wait_running()
-{
- while (!bgp_io_thread_run)
- pthread_cond_wait(running_cond, running_cond_mtx);
-}
-
-int bgp_io_stop(void **result, struct frr_pthread *fpt)
-{
- thread_add_event(fpt->master, &bgp_io_finish, NULL, 0, NULL);
- pthread_join(fpt->thread, result);
-
- pthread_mutex_unlock(running_cond_mtx);
- pthread_mutex_destroy(running_cond_mtx);
- pthread_cond_destroy(running_cond);
-
- XFREE(MTYPE_PTHREAD_PRIM, running_cond_mtx);
- XFREE(MTYPE_PTHREAD_PRIM, running_cond);
-
- return 0;
-}
-
-/* Extern API -------------------------------------------------------------- */
+/* Thread external API ----------------------------------------------------- */
void bgp_writes_on(struct peer *peer)
{
- assert(bgp_io_thread_run);
+ struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+ assert(fpt->running);
assert(peer->status != Deleted);
assert(peer->obuf);
assert(!peer->t_connect_check_w);
assert(peer->fd);
- struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
-
thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
&peer->t_write);
SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
void bgp_writes_off(struct peer *peer)
{
- assert(bgp_io_thread_run);
-
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+ assert(fpt->running);
thread_cancel_async(fpt->master, &peer->t_write, NULL);
THREAD_OFF(peer->t_generate_updgrp_packets);
void bgp_reads_on(struct peer *peer)
{
- assert(bgp_io_thread_run);
+ struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+ assert(fpt->running);
assert(peer->status != Deleted);
assert(peer->ibuf);
assert(!peer->t_connect_check_w);
assert(peer->fd);
- struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
-
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
void bgp_reads_off(struct peer *peer)
{
- assert(bgp_io_thread_run);
-
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+ assert(fpt->running);
thread_cancel_async(fpt->master, &peer->t_read, NULL);
THREAD_OFF(peer->t_process_packet);
UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
}
-/* Internal functions ------------------------------------------------------- */
+/* Thread internal functions ----------------------------------------------- */
-/**
+/*
* Called from I/O pthread when a file descriptor has become ready for writing.
*/
static int bgp_process_writes(struct thread *thread)
}
pthread_mutex_unlock(&peer->io_mtx);
- if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { /* no problem */
+ /* no problem */
+ if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
}
+ /* problem */
if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
- reschedule = false; /* problem */
+ reschedule = false;
fatal = true;
}
return 0;
}
-/**
+/*
* Called from I/O pthread when a file descriptor has become ready for reading,
* or has hung up.
*
bool more = true; // whether we got more data
bool fatal = false; // whether fatal error occurred
bool added_pkt = false; // whether we pushed onto ->ibuf
- bool header_valid = true; // whether header is valid
/* clang-format on */
peer = THREAD_ARG(thread);
- if (peer->fd < 0)
+ if (peer->fd < 0 || bm->terminating)
return -1;
struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
/* static buffer for transferring packets */
static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
/* shorter alias to peer's input buffer */
- struct stream *ibw = peer->ibuf_work;
- /* offset of start of current packet */
- size_t offset = stream_get_getp(ibw);
+ struct ringbuf *ibw = peer->ibuf_work;
/* packet size as given by header */
- u_int16_t pktsize = 0;
+ uint16_t pktsize = 0;
/* check that we have enough data for a header */
- if (STREAM_READABLE(ibw) < BGP_HEADER_SIZE)
+ if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
break;
- /* validate header */
- header_valid = validate_header(peer);
-
- if (!header_valid) {
+ /* check that header is valid */
+ if (!validate_header(peer)) {
fatal = true;
break;
}
/* header is valid; retrieve packet size */
- pktsize = stream_getw_from(ibw, offset + BGP_MARKER_SIZE);
+ ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
+
+ pktsize = ntohs(pktsize);
/* if this fails we are seriously screwed */
assert(pktsize <= BGP_MAX_PACKET_SIZE);
- /* If we have that much data, chuck it into its own
- * stream and append to input queue for processing. */
- if (STREAM_READABLE(ibw) >= pktsize) {
+ /*
+ * If we have that much data, chuck it into its own
+ * stream and append to input queue for processing.
+ */
+ if (ringbuf_remain(ibw) >= pktsize) {
struct stream *pkt = stream_new(pktsize);
- stream_get(pktbuf, ibw, pktsize);
+ assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
stream_put(pkt, pktbuf, pktsize);
pthread_mutex_lock(&peer->io_mtx);
break;
}
- /*
- * After reading:
- * 1. Move unread data to stream start to make room for more.
- * 2. Reschedule and return when we have additional data.
- *
- * XXX: Heavy abuse of stream API. This needs a ring buffer.
- */
- if (more && STREAM_WRITEABLE(peer->ibuf_work) < BGP_MAX_PACKET_SIZE) {
- void *from = stream_pnt(peer->ibuf_work);
- void *to = peer->ibuf_work->data;
- size_t siz = STREAM_READABLE(peer->ibuf_work);
- memmove(to, from, siz);
- stream_set_getp(peer->ibuf_work, 0);
- stream_set_endp(peer->ibuf_work, siz);
- }
-
- assert(STREAM_WRITEABLE(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
+ assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
/* handle invalid header */
if (fatal) {
/* wipe buffer just in case someone screwed up */
- stream_reset(peer->ibuf_work);
+ ringbuf_wipe(peer->ibuf_work);
} else {
thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
&peer->t_read);
return 0;
}
-/**
+/*
* Flush peer output buffer.
*
* This function pops packets off of peer->obuf and writes them to peer->fd.
*/
static uint16_t bgp_write(struct peer *peer)
{
- u_char type;
+ uint8_t type;
struct stream *s;
int num;
int update_last_write = 0;
unsigned int count = 0;
- uint32_t oc;
- uint32_t uo;
+ uint32_t uo = 0;
uint16_t status = 0;
uint32_t wpkt_quanta_old;
- // save current # updates sent
- oc = atomic_load_explicit(&peer->update_out, memory_order_relaxed);
-
- // cache current write quanta
- wpkt_quanta_old =
- atomic_load_explicit(&peer->bgp->wpkt_quanta, memory_order_relaxed);
+ wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
+ memory_order_relaxed);
while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
int writenum;
}
goto done;
- } else if (num != writenum) // incomplete write
+ } else if (num != writenum)
stream_forward_getp(s, num);
} while (num != writenum);
case BGP_MSG_UPDATE:
atomic_fetch_add_explicit(&peer->update_out, 1,
memory_order_relaxed);
+ uo++;
break;
case BGP_MSG_NOTIFY:
atomic_fetch_add_explicit(&peer->notify_out, 1,
if (peer->v_start >= (60 * 2))
peer->v_start = (60 * 2);
- /* Handle Graceful Restart case where the state changes
- * to Connect instead of Idle */
+ /*
+ * Handle Graceful Restart case where the state changes
+ * to Connect instead of Idle.
+ */
BGP_EVENT_ADD(peer, BGP_Stop);
goto done;
}
done : {
- /* Update last_update if UPDATEs were written. */
- uo = atomic_load_explicit(&peer->update_out, memory_order_relaxed);
- if (uo > oc)
+ /*
+ * Update last_update if UPDATEs were written.
+ * Note: that these are only updated at end,
+ * not per message (i.e., per loop)
+ */
+ if (uo)
atomic_store_explicit(&peer->last_update, bgp_clock(),
memory_order_relaxed);
return status;
}
-/**
+/*
* Reads a chunk of data from peer->fd into peer->ibuf_work.
*
* @return status flag (see top-of-file)
size_t readsize; // how many bytes we want to read
ssize_t nbytes; // how many bytes we actually read
uint16_t status = 0;
+ static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
- readsize = STREAM_WRITEABLE(peer->ibuf_work);
-
- nbytes = stream_read_try(peer->ibuf_work, peer->fd, readsize);
+ readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
+ nbytes = read(peer->fd, ibw, readsize);
- switch (nbytes) {
- /* Fatal error; tear down session */
- case -1:
+ /* EAGAIN or EWOULDBLOCK; come back later */
+ if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
+ SET_FLAG(status, BGP_IO_TRANS_ERR);
+ /* Fatal error; tear down session */
+ } else if (nbytes < 0) {
zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
safe_strerror(errno));
BGP_EVENT_ADD(peer, TCP_fatal_error);
SET_FLAG(status, BGP_IO_FATAL_ERR);
- break;
-
- /* Received EOF / TCP session closed */
- case 0:
+ /* Received EOF / TCP session closed */
+ } else if (nbytes == 0) {
if (bgp_debug_neighbor_events(peer))
zlog_debug("%s [Event] BGP connection closed fd %d",
peer->host, peer->fd);
BGP_EVENT_ADD(peer, TCP_connection_closed);
SET_FLAG(status, BGP_IO_FATAL_ERR);
- break;
-
- /* EAGAIN or EWOULDBLOCK; come back later */
- case -2:
- SET_FLAG(status, BGP_IO_TRANS_ERR);
- break;
- default:
- break;
+ } else {
+ assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
+ == (size_t)nbytes);
}
return status;
/*
* Called after we have read a BGP packet header. Validates marker, message
* type and packet length. If any of these aren't correct, sends a notify.
+ *
+ * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
+ * buffer.
*/
static bool validate_header(struct peer *peer)
{
uint16_t size;
uint8_t type;
- struct stream *pkt = peer->ibuf_work;
- size_t getp = stream_get_getp(pkt);
+ struct ringbuf *pkt = peer->ibuf_work;
+
+ static uint8_t m_correct[BGP_MARKER_SIZE] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
- static uint8_t marker[BGP_MARKER_SIZE] = {
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
+ return false;
- if (memcmp(marker, stream_pnt(pkt), BGP_MARKER_SIZE) != 0) {
+ if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
BGP_NOTIFY_HEADER_NOT_SYNC);
return false;
}
- /* Get size and type in host byte order. */
- size = stream_getw_from(pkt, getp + BGP_MARKER_SIZE);
- type = stream_getc_from(pkt, getp + BGP_MARKER_SIZE + 2);
+ /* Get size and type in network byte order. */
+ ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
+ ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
+
+ size = ntohs(size);
/* BGP type check. */
if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
type);
bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
- BGP_NOTIFY_HEADER_BAD_MESTYPE,
- &type, 1);
+ BGP_NOTIFY_HEADER_BAD_MESTYPE, &type,
+ 1);
return false;
}
zlog_debug("%s bad message length - %d for %s",
peer->host, size,
type == 128 ? "ROUTE-REFRESH"
- : bgp_type_str[(int) type]);
+ : bgp_type_str[(int)type]);
}
uint16_t nsize = htons(size);
bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
BGP_NOTIFY_HEADER_BAD_MESLEN,
- (unsigned char *) &nsize, 2);
+ (unsigned char *)&nsize, 2);
return false;
}