]> git.proxmox.com Git - mirror_frr.git/blobdiff - bgpd/bgp_io.c
bgpd: enumerate support for dscp values
[mirror_frr.git] / bgpd / bgp_io.c
index c9662e2121f20053ac48dc4ca8276ea8fab553cb..c8d5b1daa121a3ed51817f242e0dc53fde9fc21e 100644 (file)
@@ -29,6 +29,7 @@
 #include "memory.h"            // for MTYPE_TMP, XCALLOC, XFREE
 #include "network.h"           // for ERRNO_IO_RETRY
 #include "stream.h"            // for stream_get_endp, stream_getw_from, str...
+#include "ringbuf.h"           // for ringbuf_remain, ringbuf_peek, ringbuf_...
 #include "thread.h"            // for THREAD_OFF, THREAD_ARG, thread, thread...
 #include "zassert.h"           // for assert
 
@@ -50,97 +51,12 @@ static bool validate_header(struct peer *);
 #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
 #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
 
-/* Plumbing & control variables for thread lifecycle
- * ------------------------------------------------------------------------ */
-bool bgp_io_thread_run;
-pthread_mutex_t *running_cond_mtx;
-pthread_cond_t *running_cond;
-
-/* Unused callback for thread_add_read() */
-static int bgp_io_dummy(struct thread *thread) { return 0; }
-
-/* Poison pill task */
-static int bgp_io_finish(struct thread *thread)
-{
-       bgp_io_thread_run = false;
-       return 0;
-}
-
-/* Extern lifecycle control functions. init -> start -> stop
- * ------------------------------------------------------------------------ */
-void bgp_io_init()
-{
-       bgp_io_thread_run = false;
-
-       running_cond_mtx = XCALLOC(MTYPE_PTHREAD_PRIM, sizeof(pthread_mutex_t));
-       running_cond = XCALLOC(MTYPE_PTHREAD_PRIM, sizeof(pthread_cond_t));
-
-       pthread_mutex_init(running_cond_mtx, NULL);
-       pthread_cond_init(running_cond, NULL);
-
-       pthread_mutex_lock(running_cond_mtx);
-}
-
-void *bgp_io_start(void *arg)
-{
-       struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
-       fpt->master->owner = pthread_self();
-
-       // fd so we can sleep in poll()
-       int sleeper[2];
-       pipe(sleeper);
-       thread_add_read(fpt->master, &bgp_io_dummy, NULL, sleeper[0], NULL);
-
-       // we definitely don't want to handle signals
-       fpt->master->handle_signals = false;
-
-       struct thread task;
-
-       bgp_io_thread_run = true;
-       pthread_mutex_lock(running_cond_mtx);
-       {
-               pthread_cond_signal(running_cond);
-       }
-       pthread_mutex_unlock(running_cond_mtx);
-
-       while (bgp_io_thread_run) {
-               if (thread_fetch(fpt->master, &task)) {
-                       thread_call(&task);
-               }
-       }
-
-       close(sleeper[1]);
-       close(sleeper[0]);
-
-       return NULL;
-}
-
-void bgp_io_wait_running()
-{
-       while (!bgp_io_thread_run)
-               pthread_cond_wait(running_cond, running_cond_mtx);
-}
-
-int bgp_io_stop(void **result, struct frr_pthread *fpt)
-{
-       thread_add_event(fpt->master, &bgp_io_finish, NULL, 0, NULL);
-       pthread_join(fpt->thread, result);
-
-       pthread_mutex_unlock(running_cond_mtx);
-       pthread_mutex_destroy(running_cond_mtx);
-       pthread_cond_destroy(running_cond);
-
-       XFREE(MTYPE_PTHREAD_PRIM, running_cond_mtx);
-       XFREE(MTYPE_PTHREAD_PRIM, running_cond);
-
-       return 0;
-}
-
-/* Extern API -------------------------------------------------------------- */
+/* Thread external API ----------------------------------------------------- */
 
 void bgp_writes_on(struct peer *peer)
 {
-       assert(bgp_io_thread_run);
+       struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+       assert(fpt->running);
 
        assert(peer->status != Deleted);
        assert(peer->obuf);
@@ -150,8 +66,6 @@ void bgp_writes_on(struct peer *peer)
        assert(!peer->t_connect_check_w);
        assert(peer->fd);
 
-       struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
-
        thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
                         &peer->t_write);
        SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
@@ -159,9 +73,8 @@ void bgp_writes_on(struct peer *peer)
 
 void bgp_writes_off(struct peer *peer)
 {
-       assert(bgp_io_thread_run);
-
        struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+       assert(fpt->running);
 
        thread_cancel_async(fpt->master, &peer->t_write, NULL);
        THREAD_OFF(peer->t_generate_updgrp_packets);
@@ -171,7 +84,8 @@ void bgp_writes_off(struct peer *peer)
 
 void bgp_reads_on(struct peer *peer)
 {
-       assert(bgp_io_thread_run);
+       struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+       assert(fpt->running);
 
        assert(peer->status != Deleted);
        assert(peer->ibuf);
@@ -182,8 +96,6 @@ void bgp_reads_on(struct peer *peer)
        assert(!peer->t_connect_check_w);
        assert(peer->fd);
 
-       struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
-
        thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
                        &peer->t_read);
 
@@ -192,9 +104,8 @@ void bgp_reads_on(struct peer *peer)
 
 void bgp_reads_off(struct peer *peer)
 {
-       assert(bgp_io_thread_run);
-
        struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
+       assert(fpt->running);
 
        thread_cancel_async(fpt->master, &peer->t_read, NULL);
        THREAD_OFF(peer->t_process_packet);
@@ -202,9 +113,9 @@ void bgp_reads_off(struct peer *peer)
        UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
 }
 
-/* Internal functions ------------------------------------------------------- */
+/* Thread internal functions ----------------------------------------------- */
 
-/**
+/*
  * Called from I/O pthread when a file descriptor has become ready for writing.
  */
 static int bgp_process_writes(struct thread *thread)
@@ -227,11 +138,13 @@ static int bgp_process_writes(struct thread *thread)
        }
        pthread_mutex_unlock(&peer->io_mtx);
 
-       if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { /* no problem */
+       /* no problem */
+       if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
        }
 
+       /* problem */
        if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
-               reschedule = false; /* problem */
+               reschedule = false;
                fatal = true;
        }
 
@@ -246,7 +159,7 @@ static int bgp_process_writes(struct thread *thread)
        return 0;
 }
 
-/**
+/*
  * Called from I/O pthread when a file descriptor has become ready for reading,
  * or has hung up.
  *
@@ -261,12 +174,11 @@ static int bgp_process_reads(struct thread *thread)
        bool more = true;               // whether we got more data
        bool fatal = false;             // whether fatal error occurred
        bool added_pkt = false;         // whether we pushed onto ->ibuf
-       bool header_valid = true;       // whether header is valid
        /* clang-format on */
 
        peer = THREAD_ARG(thread);
 
-       if (peer->fd < 0)
+       if (peer->fd < 0 || bm->terminating)
                return -1;
 
        struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
@@ -293,35 +205,35 @@ static int bgp_process_reads(struct thread *thread)
                /* static buffer for transferring packets */
                static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
                /* shorter alias to peer's input buffer */
-               struct stream *ibw = peer->ibuf_work;
-               /* offset of start of current packet */
-               size_t offset = stream_get_getp(ibw);
+               struct ringbuf *ibw = peer->ibuf_work;
                /* packet size as given by header */
-               u_int16_t pktsize = 0;
+               uint16_t pktsize = 0;
 
                /* check that we have enough data for a header */
-               if (STREAM_READABLE(ibw) < BGP_HEADER_SIZE)
+               if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
                        break;
 
-               /* validate header */
-               header_valid = validate_header(peer);
-
-               if (!header_valid) {
+               /* check that header is valid */
+               if (!validate_header(peer)) {
                        fatal = true;
                        break;
                }
 
                /* header is valid; retrieve packet size */
-               pktsize = stream_getw_from(ibw, offset + BGP_MARKER_SIZE);
+               ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
+
+               pktsize = ntohs(pktsize);
 
                /* if this fails we are seriously screwed */
                assert(pktsize <= BGP_MAX_PACKET_SIZE);
 
-               /* If we have that much data, chuck it into its own
-                * stream and append to input queue for processing. */
-               if (STREAM_READABLE(ibw) >= pktsize) {
+               /*
+                * If we have that much data, chuck it into its own
+                * stream and append to input queue for processing.
+                */
+               if (ringbuf_remain(ibw) >= pktsize) {
                        struct stream *pkt = stream_new(pktsize);
-                       stream_get(pktbuf, ibw, pktsize);
+                       assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
                        stream_put(pkt, pktbuf, pktsize);
 
                        pthread_mutex_lock(&peer->io_mtx);
@@ -335,28 +247,12 @@ static int bgp_process_reads(struct thread *thread)
                        break;
        }
 
-       /*
-        * After reading:
-        * 1. Move unread data to stream start to make room for more.
-        * 2. Reschedule and return when we have additional data.
-        *
-        * XXX: Heavy abuse of stream API. This needs a ring buffer.
-        */
-       if (more && STREAM_WRITEABLE(peer->ibuf_work) < BGP_MAX_PACKET_SIZE) {
-               void *from = stream_pnt(peer->ibuf_work);
-               void *to = peer->ibuf_work->data;
-               size_t siz = STREAM_READABLE(peer->ibuf_work);
-               memmove(to, from, siz);
-               stream_set_getp(peer->ibuf_work, 0);
-               stream_set_endp(peer->ibuf_work, siz);
-       }
-
-       assert(STREAM_WRITEABLE(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
+       assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
 
        /* handle invalid header */
        if (fatal) {
                /* wipe buffer just in case someone screwed up */
-               stream_reset(peer->ibuf_work);
+               ringbuf_wipe(peer->ibuf_work);
        } else {
                thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
                                &peer->t_read);
@@ -368,7 +264,7 @@ static int bgp_process_reads(struct thread *thread)
        return 0;
 }
 
-/**
+/*
  * Flush peer output buffer.
  *
  * This function pops packets off of peer->obuf and writes them to peer->fd.
@@ -382,22 +278,17 @@ static int bgp_process_reads(struct thread *thread)
  */
 static uint16_t bgp_write(struct peer *peer)
 {
-       u_char type;
+       uint8_t type;
        struct stream *s;
        int num;
        int update_last_write = 0;
        unsigned int count = 0;
-       uint32_t oc;
-       uint32_t uo;
+       uint32_t uo = 0;
        uint16_t status = 0;
        uint32_t wpkt_quanta_old;
 
-       // save current # updates sent
-       oc = atomic_load_explicit(&peer->update_out, memory_order_relaxed);
-
-       // cache current write quanta
-       wpkt_quanta_old =
-           atomic_load_explicit(&peer->bgp->wpkt_quanta, memory_order_relaxed);
+       wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
+                                              memory_order_relaxed);
 
        while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
                int writenum;
@@ -414,7 +305,7 @@ static uint16_t bgp_write(struct peer *peer)
                                }
 
                                goto done;
-                       } else if (num != writenum) // incomplete write
+                       } else if (num != writenum)
                                stream_forward_getp(s, num);
 
                } while (num != writenum);
@@ -431,6 +322,7 @@ static uint16_t bgp_write(struct peer *peer)
                case BGP_MSG_UPDATE:
                        atomic_fetch_add_explicit(&peer->update_out, 1,
                                                  memory_order_relaxed);
+                       uo++;
                        break;
                case BGP_MSG_NOTIFY:
                        atomic_fetch_add_explicit(&peer->notify_out, 1,
@@ -442,8 +334,10 @@ static uint16_t bgp_write(struct peer *peer)
                        if (peer->v_start >= (60 * 2))
                                peer->v_start = (60 * 2);
 
-                       /* Handle Graceful Restart case where the state changes
-                        * to Connect instead of Idle */
+                       /*
+                        * Handle Graceful Restart case where the state changes
+                        * to Connect instead of Idle.
+                        */
                        BGP_EVENT_ADD(peer, BGP_Stop);
                        goto done;
 
@@ -469,9 +363,12 @@ static uint16_t bgp_write(struct peer *peer)
        }
 
 done : {
-       /* Update last_update if UPDATEs were written. */
-       uo = atomic_load_explicit(&peer->update_out, memory_order_relaxed);
-       if (uo > oc)
+       /*
+        * Update last_update if UPDATEs were written.
+        * Note: that these are only updated at end,
+        *       not per message (i.e., per loop)
+        */
+       if (uo)
                atomic_store_explicit(&peer->last_update, bgp_clock(),
                                      memory_order_relaxed);
 
@@ -484,7 +381,7 @@ done : {
        return status;
 }
 
-/**
+/*
  * Reads a chunk of data from peer->fd into peer->ibuf_work.
  *
  * @return status flag (see top-of-file)
@@ -494,14 +391,16 @@ static uint16_t bgp_read(struct peer *peer)
        size_t readsize; // how many bytes we want to read
        ssize_t nbytes;  // how many bytes we actually read
        uint16_t status = 0;
+       static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
 
-       readsize = STREAM_WRITEABLE(peer->ibuf_work);
-
-       nbytes = stream_read_try(peer->ibuf_work, peer->fd, readsize);
+       readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
+       nbytes = read(peer->fd, ibw, readsize);
 
-       switch (nbytes) {
-       /* Fatal error; tear down session */
-       case -1:
+       /* EAGAIN or EWOULDBLOCK; come back later */
+       if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
+               SET_FLAG(status, BGP_IO_TRANS_ERR);
+               /* Fatal error; tear down session */
+       } else if (nbytes < 0) {
                zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
                         safe_strerror(errno));
 
@@ -515,10 +414,8 @@ static uint16_t bgp_read(struct peer *peer)
 
                BGP_EVENT_ADD(peer, TCP_fatal_error);
                SET_FLAG(status, BGP_IO_FATAL_ERR);
-               break;
-
-       /* Received EOF / TCP session closed */
-       case 0:
+               /* Received EOF / TCP session closed */
+       } else if (nbytes == 0) {
                if (bgp_debug_neighbor_events(peer))
                        zlog_debug("%s [Event] BGP connection closed fd %d",
                                   peer->host, peer->fd);
@@ -533,14 +430,9 @@ static uint16_t bgp_read(struct peer *peer)
 
                BGP_EVENT_ADD(peer, TCP_connection_closed);
                SET_FLAG(status, BGP_IO_FATAL_ERR);
-               break;
-
-       /* EAGAIN or EWOULDBLOCK; come back later */
-       case -2:
-               SET_FLAG(status, BGP_IO_TRANS_ERR);
-               break;
-       default:
-               break;
+       } else {
+               assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
+                      == (size_t)nbytes);
        }
 
        return status;
@@ -549,27 +441,35 @@ static uint16_t bgp_read(struct peer *peer)
 /*
  * Called after we have read a BGP packet header. Validates marker, message
  * type and packet length. If any of these aren't correct, sends a notify.
+ *
+ * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
+ * buffer.
  */
 static bool validate_header(struct peer *peer)
 {
        uint16_t size;
        uint8_t type;
-       struct stream *pkt = peer->ibuf_work;
-       size_t getp = stream_get_getp(pkt);
+       struct ringbuf *pkt = peer->ibuf_work;
+
+       static uint8_t m_correct[BGP_MARKER_SIZE] = {
+               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+               0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+       uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
 
-       static uint8_t marker[BGP_MARKER_SIZE] = {
-           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+       if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
+               return false;
 
-       if (memcmp(marker, stream_pnt(pkt), BGP_MARKER_SIZE) != 0) {
+       if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
                bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
                                BGP_NOTIFY_HEADER_NOT_SYNC);
                return false;
        }
 
-       /* Get size and type in host byte order. */
-       size = stream_getw_from(pkt, getp + BGP_MARKER_SIZE);
-       type = stream_getc_from(pkt, getp + BGP_MARKER_SIZE + 2);
+       /* Get size and type in network byte order. */
+       ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
+       ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
+
+       size = ntohs(size);
 
        /* BGP type check. */
        if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
@@ -582,8 +482,8 @@ static bool validate_header(struct peer *peer)
                                   type);
 
                bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
-                                         BGP_NOTIFY_HEADER_BAD_MESTYPE,
-                                         &type, 1);
+                                         BGP_NOTIFY_HEADER_BAD_MESTYPE, &type,
+                                         1);
                return false;
        }
 
@@ -603,14 +503,14 @@ static bool validate_header(struct peer *peer)
                        zlog_debug("%s bad message length - %d for %s",
                                   peer->host, size,
                                   type == 128 ? "ROUTE-REFRESH"
-                                              : bgp_type_str[(int) type]);
+                                              : bgp_type_str[(int)type]);
                }
 
                uint16_t nsize = htons(size);
 
                bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
                                          BGP_NOTIFY_HEADER_BAD_MESLEN,
-                                         (unsigned char *) &nsize, 2);
+                                         (unsigned char *)&nsize, 2);
                return false;
        }