]> git.proxmox.com Git - mirror_frr.git/blobdiff - bgpd/bgp_packet.c
*: conform with COMMUNITY.md formatting rules, via 'make indent'
[mirror_frr.git] / bgpd / bgp_packet.c
index c243f4b0e30ba11605e3e7c0455d685986f762a0..cb702d80d1f1fd7f8afe97db90ece96fb56098ce 100644 (file)
@@ -58,6 +58,7 @@
 #include "bgpd/bgp_updgrp.h"
 #include "bgpd/bgp_label.h"
 #include "bgpd/bgp_io.h"
+#include "bgpd/bgp_keepalives.h"
 
 /**
  * Sets marker and type fields for a BGP message.
@@ -170,12 +171,12 @@ void bgp_check_update_delay(struct bgp *bgp)
 
        if (bgp->established
            <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
-               /* This is an extra sanity check to make sure we wait for all
-                  the
-                  eligible configured peers. This check is performed if
-                  establish wait
-                  timer is on, or establish wait option is not given with the
-                  update-delay command */
+               /*
+                * This is an extra sanity check to make sure we wait for all
+                * the eligible configured peers. This check is performed if
+                * establish wait timer is on, or establish wait option is not
+                * given with the update-delay command
+                */
                if (bgp->t_establish_wait
                    || (bgp->v_establish_wait == bgp->v_update_delay))
                        for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
@@ -200,8 +201,10 @@ void bgp_check_update_delay(struct bgp *bgp)
        }
 }
 
-/* Called if peer is known to have restarted. The restart-state bit in
-   Graceful-Restart capability is used for that */
+/*
+ * Called if peer is known to have restarted. The restart-state bit in
+ * Graceful-Restart capability is used for that
+ */
 void bgp_update_restarted_peers(struct peer *peer)
 {
        if (!bgp_update_delay_active(peer->bgp))
@@ -219,10 +222,12 @@ void bgp_update_restarted_peers(struct peer *peer)
        }
 }
 
-/* Called as peer receives a keep-alive. Determines if this occurence can be
-   taken as an implicit EOR for this peer.
-   NOTE: The very first keep-alive after the Established state of a peer is
-        considered implicit EOR for the update-delay purposes */
+/*
+ * Called as peer receives a keep-alive. Determines if this occurence can be
+ * taken as an implicit EOR for this peer.
+ * NOTE: The very first keep-alive after the Established state of a peer is
+ * considered implicit EOR for the update-delay purposes
+ */
 void bgp_update_implicit_eors(struct peer *peer)
 {
        if (!bgp_update_delay_active(peer->bgp))
@@ -240,8 +245,10 @@ void bgp_update_implicit_eors(struct peer *peer)
        }
 }
 
-/* Should be called only when there is a change in the EOR_RECEIVED status
-   for any afi/safi on a peer */
+/*
+ * Should be called only when there is a change in the EOR_RECEIVED status
+ * for any afi/safi on a peer.
+ */
 static void bgp_update_explicit_eors(struct peer *peer)
 {
        afi_t afi;
@@ -300,10 +307,64 @@ int bgp_nlri_parse(struct peer *peer, struct attr *attr,
 }
 
 /*
- * Enqueue onto the peer's output buffer any packets which are pending for the
- * update group it is a member of.
+ * Checks a variety of conditions to determine whether the peer needs to be
+ * rescheduled for packet generation again, and does so if necessary.
  *
- * XXX: Severely needs performance work.
+ * @param peer to check for rescheduling
+ */
+static void bgp_write_proceed_actions(struct peer *peer)
+{
+       afi_t afi;
+       safi_t safi;
+       struct peer_af *paf;
+       struct bpacket *next_pkt;
+       struct update_subgroup *subgrp;
+
+       FOREACH_AFI_SAFI (afi, safi) {
+               paf = peer_af_find(peer, afi, safi);
+               if (!paf)
+                       continue;
+               subgrp = paf->subgroup;
+               if (!subgrp)
+                       continue;
+
+               next_pkt = paf->next_pkt_to_send;
+               if (next_pkt && next_pkt->buffer) {
+                       BGP_TIMER_ON(peer->t_generate_updgrp_packets,
+                                    bgp_generate_updgrp_packets, 0);
+                       return;
+               }
+
+               /* No packets readily available for AFI/SAFI, are there
+                * subgroup packets
+                * that need to be generated? */
+               if (bpacket_queue_is_full(SUBGRP_INST(subgrp),
+                                         SUBGRP_PKTQ(subgrp))
+                   || subgroup_packets_to_build(subgrp)) {
+                       BGP_TIMER_ON(peer->t_generate_updgrp_packets,
+                                    bgp_generate_updgrp_packets, 0);
+                       return;
+               }
+
+               /* No packets to send, see if EOR is pending */
+               if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) {
+                       if (!subgrp->t_coalesce && peer->afc_nego[afi][safi]
+                           && peer->synctime
+                           && !CHECK_FLAG(peer->af_sflags[afi][safi],
+                                          PEER_STATUS_EOR_SEND)
+                           && safi != SAFI_MPLS_VPN) {
+                               BGP_TIMER_ON(peer->t_generate_updgrp_packets,
+                                            bgp_generate_updgrp_packets, 0);
+                               return;
+                       }
+               }
+       }
+}
+
+/*
+ * Generate advertisement information (withdraws, updates, EOR) from each
+ * update group a peer belongs to, encode this information into packets, and
+ * enqueue the packets onto the peer's output buffer.
  */
 int bgp_generate_updgrp_packets(struct thread *thread)
 {
@@ -312,9 +373,14 @@ int bgp_generate_updgrp_packets(struct thread *thread)
        struct stream *s;
        struct peer_af *paf;
        struct bpacket *next_pkt;
+       uint32_t wpq;
+       uint32_t generated = 0;
        afi_t afi;
        safi_t safi;
 
+       wpq = atomic_load_explicit(&peer->bgp->wpkt_quanta,
+                                  memory_order_relaxed);
+
        /*
         * The code beyond this part deals with update packets, proceed only
         * if peer is Established and updates are not on hold (as part of
@@ -328,79 +394,71 @@ int bgp_generate_updgrp_packets(struct thread *thread)
 
        do {
                s = NULL;
-               for (afi = AFI_IP; afi < AFI_MAX; afi++)
-                       for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) {
-                               paf = peer_af_find(peer, afi, safi);
-                               if (!paf || !PAF_SUBGRP(paf))
-                                       continue;
+               FOREACH_AFI_SAFI (afi, safi) {
+                       paf = peer_af_find(peer, afi, safi);
+                       if (!paf || !PAF_SUBGRP(paf))
+                               continue;
+                       next_pkt = paf->next_pkt_to_send;
+
+                       /*
+                        * Try to generate a packet for the peer if we are at
+                        * the end of the list. Always try to push out
+                        * WITHDRAWs first.
+                        */
+                       if (!next_pkt || !next_pkt->buffer) {
+                               next_pkt = subgroup_withdraw_packet(
+                                       PAF_SUBGRP(paf));
+                               if (!next_pkt || !next_pkt->buffer)
+                                       subgroup_update_packet(PAF_SUBGRP(paf));
                                next_pkt = paf->next_pkt_to_send;
+                       }
 
-                               /* Try to generate a packet for the peer if we
-                                * are at the end of
-                                * the list. Always try to push out WITHDRAWs
-                                * first. */
-                               if (!next_pkt || !next_pkt->buffer) {
-                                       next_pkt = subgroup_withdraw_packet(
-                                               PAF_SUBGRP(paf));
-                                       if (!next_pkt || !next_pkt->buffer)
-                                               subgroup_update_packet(
-                                                       PAF_SUBGRP(paf));
-                                       next_pkt = paf->next_pkt_to_send;
-                               }
-
-                               /* If we still don't have a packet to send to
-                                * the peer, then
-                                * try to find out out if we have to send eor or
-                                * if not, skip to
-                                * the next AFI, SAFI.
-                                * Don't send the EOR prematurely... if the
-                                * subgroup's coalesce
-                                * timer is running, the adjacency-out structure
-                                * is not created
-                                * yet.
-                                */
-                               if (!next_pkt || !next_pkt->buffer) {
-                                       if (CHECK_FLAG(peer->cap,
-                                                      PEER_CAP_RESTART_RCV)) {
-                                               if (!(PAF_SUBGRP(paf))
-                                                            ->t_coalesce
-                                                   && peer->afc_nego[afi][safi]
-                                                   && peer->synctime
-                                                   && !CHECK_FLAG(
-                                                              peer->af_sflags
-                                                                      [afi]
-                                                                      [safi],
-                                                              PEER_STATUS_EOR_SEND)) {
-                                                       SET_FLAG(
-                                                               peer->af_sflags
-                                                                       [afi]
+                       /*
+                        * If we still don't have a packet to send to the peer,
+                        * then try to find out out if we have to send eor or
+                        * if not, skip to the next AFI, SAFI. Don't send the
+                        * EOR prematurely; if the subgroup's coalesce timer is
+                        * running, the adjacency-out structure is not created
+                        * yet.
+                        */
+                       if (!next_pkt || !next_pkt->buffer) {
+                               if (CHECK_FLAG(peer->cap,
+                                              PEER_CAP_RESTART_RCV)) {
+                                       if (!(PAF_SUBGRP(paf))->t_coalesce
+                                           && peer->afc_nego[afi][safi]
+                                           && peer->synctime
+                                           && !CHECK_FLAG(
+                                                      peer->af_sflags[afi]
+                                                                     [safi],
+                                                      PEER_STATUS_EOR_SEND)) {
+                                               SET_FLAG(peer->af_sflags[afi]
                                                                        [safi],
-                                                               PEER_STATUS_EOR_SEND);
-
-                                                       if ((s = bgp_update_packet_eor(
-                                                                    peer, afi,
-                                                                    safi))) {
-                                                               bgp_packet_add(
-                                                                       peer,
-                                                                       s);
-                                                               bgp_writes_on(
-                                                                       peer);
-                                                       }
+                                                        PEER_STATUS_EOR_SEND);
+
+                                               if ((s = bgp_update_packet_eor(
+                                                            peer, afi,
+                                                            safi))) {
+                                                       bgp_packet_add(peer, s);
                                                }
                                        }
-                                       continue;
                                }
+                               continue;
+                       }
 
 
-                               /* Found a packet template to send, overwrite
-                                * packet with appropriate
-                                * attributes from peer and advance peer */
-                               s = bpacket_reformat_for_peer(next_pkt, paf);
-                               bgp_packet_add(peer, s);
-                               bgp_writes_on(peer);
-                               bpacket_queue_advance_peer(paf);
-                       }
-       } while (s);
+                       /* Found a packet template to send, overwrite
+                        * packet with appropriate attributes from peer
+                        * and advance peer */
+                       s = bpacket_reformat_for_peer(next_pkt, paf);
+                       bgp_packet_add(peer, s);
+                       bpacket_queue_advance_peer(paf);
+               }
+       } while (s && (++generated < wpq));
+
+       if (generated)
+               bgp_writes_on(peer);
+
+       bgp_write_proceed_actions(peer);
 
        return 0;
 }
@@ -487,32 +545,45 @@ void bgp_open_send(struct peer *peer)
        bgp_writes_on(peer);
 }
 
-/* This is only for sending NOTIFICATION message to neighbor. */
+/*
+ * Writes NOTIFICATION message directly to a peer socket without waiting for
+ * the I/O thread.
+ *
+ * There must be exactly one stream on the peer->obuf FIFO, and the data within
+ * this stream must match the format of a BGP NOTIFICATION message.
+ * Transmission is best-effort.
+ *
+ * @requires peer->io_mtx
+ * @param peer
+ * @return 0
+ */
 static int bgp_write_notify(struct peer *peer)
 {
        int ret, val;
        u_char type;
        struct stream *s;
 
-       pthread_mutex_lock(&peer->io_mtx);
-       {
-               /* There should be at least one packet. */
-               s = stream_fifo_pop(peer->obuf);
-               if (!s)
-                       return 0;
-               assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
-       }
-       pthread_mutex_unlock(&peer->io_mtx);
+       /* There should be at least one packet. */
+       s = stream_fifo_pop(peer->obuf);
+
+       if (!s)
+               return 0;
+
+       assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
 
        /* Stop collecting data within the socket */
        sockopt_cork(peer->fd, 0);
 
-       /* socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
-        * we only care about getting a clean shutdown at this point. */
+       /*
+        * socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
+        * we only care about getting a clean shutdown at this point.
+        */
        ret = write(peer->fd, STREAM_DATA(s), stream_get_endp(s));
 
-       /* only connection reset/close gets counted as TCP_fatal_error, failure
-        * to write the entire NOTIFY doesn't get different FSM treatment */
+       /*
+        * only connection reset/close gets counted as TCP_fatal_error, failure
+        * to write the entire NOTIFY doesn't get different FSM treatment
+        */
        if (ret <= 0) {
                stream_free(s);
                BGP_EVENT_ADD(peer, TCP_fatal_error);
@@ -531,6 +602,7 @@ static int bgp_write_notify(struct peer *peer)
        assert(type == BGP_MSG_NOTIFY);
 
        /* Type should be notify. */
+       atomic_fetch_add_explicit(&peer->notify_out, 1, memory_order_relaxed);
        peer->notify_out++;
 
        /* Double start timer. */
@@ -540,8 +612,10 @@ static int bgp_write_notify(struct peer *peer)
        if (peer->v_start >= (60 * 2))
                peer->v_start = (60 * 2);
 
-       /* Handle Graceful Restart case where the state changes to
-          Connect instead of Idle */
+       /*
+        * Handle Graceful Restart case where the state changes to
+        * Connect instead of Idle
+        */
        BGP_EVENT_ADD(peer, BGP_Stop);
 
        stream_free(s);
@@ -552,8 +626,16 @@ static int bgp_write_notify(struct peer *peer)
 /*
  * Creates a BGP Notify and appends it to the peer's output queue.
  *
- * This function awakens the write thread to ensure the packet
- * gets out ASAP.
+ * This function attempts to write the packet from the thread it is called
+ * from, to ensure the packet gets out ASAP.
+ *
+ * This function may be called from multiple threads. Since the function
+ * modifies I/O buffer(s) in the peer, these are locked for the duration of the
+ * call to prevent tampering from other threads.
+ *
+ * Delivery of the NOTIFICATION is attempted once and is best-effort. After
+ * return, the peer structure *must* be reset; no assumptions about session
+ * state are valid.
  *
  * @param peer
  * @param code      BGP error code
@@ -567,6 +649,10 @@ void bgp_notify_send_with_data(struct peer *peer, u_char code, u_char sub_code,
        struct stream *s;
        int length;
 
+       /* Lock I/O mutex to prevent other threads from pushing packets */
+       pthread_mutex_lock(&peer->io_mtx);
+       /* ============================================== */
+
        /* Allocate new stream. */
        s = stream_new(BGP_MAX_PACKET_SIZE);
 
@@ -585,17 +671,13 @@ void bgp_notify_send_with_data(struct peer *peer, u_char code, u_char sub_code,
        length = bgp_packet_set_size(s);
 
        /* wipe output buffer */
-       pthread_mutex_lock(&peer->io_mtx);
-       {
-               stream_fifo_clean(peer->obuf);
-       }
-       pthread_mutex_unlock(&peer->io_mtx);
+       stream_fifo_clean(peer->obuf);
 
-       /* If possible, store last packet for debugging purposes. This check is
-        * in
-        * place because we are sometimes called with a doppelganger peer, who
-        * tends
-        * to have a plethora of fields nulled out. */
+       /*
+        * If possible, store last packet for debugging purposes. This check is
+        * in place because we are sometimes called with a doppelganger peer,
+        * who tends to have a plethora of fields nulled out.
+        */
        if (peer->curr && peer->last_reset_cause_size) {
                size_t packetsize = stream_get_endp(peer->curr);
                assert(packetsize <= peer->last_reset_cause_size);
@@ -653,16 +735,19 @@ void bgp_notify_send_with_data(struct peer *peer, u_char code, u_char sub_code,
                peer->last_reset = PEER_DOWN_NOTIFY_SEND;
 
        /* Add packet to peer's output queue */
-       bgp_packet_add(peer, s);
+       stream_fifo_push(peer->obuf, s);
 
        bgp_write_notify(peer);
+
+       /* ============================================== */
+       pthread_mutex_unlock(&peer->io_mtx);
 }
 
 /*
  * Creates a BGP Notify and appends it to the peer's output queue.
  *
- * This function awakens the write thread to ensure the packet
- * gets out ASAP.
+ * This function attempts to write the packet from the thread it is called
+ * from, to ensure the packet gets out ASAP.
  *
  * @param peer
  * @param code      BGP error code
@@ -1488,9 +1573,8 @@ static int bgp_update_receive(struct peer *peer, bgp_size_t size)
         * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
         * and MP EoR should have only an empty MP_UNREACH
         */
-       if ((!update_len && !withdraw_len &&
-            nlris[NLRI_MP_UPDATE].length == 0) ||
-           (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
+       if ((!update_len && !withdraw_len && nlris[NLRI_MP_UPDATE].length == 0)
+           || (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
                afi_t afi = 0;
                safi_t safi;
 
@@ -1608,7 +1692,7 @@ static int bgp_notify_receive(struct peer *peer, bgp_size_t size)
        }
 
        /* peer count update */
-       peer->notify_in++;
+       atomic_fetch_add_explicit(&peer->notify_in, 1, memory_order_relaxed);
 
        peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
 
@@ -2078,22 +2162,7 @@ int bgp_process_packet(struct thread *thread)
        peer = THREAD_ARG(thread);
        rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
                                               memory_order_relaxed);
-
-       /*
-        * XXX: At present multiple packet reads per input cycle are
-        * problematic. The issue is that some of the packet processing
-        * functions perform their own FSM checks, that arguably should be
-        * located in bgp_fsm.c. For example if we are in OpenConfirm process a
-        * Keepalive, then a keepalive-received event is placed on the event
-        * queue to handle later. If we then process an Update before that
-        * event has popped, the update function checks that the peer status is
-        * in Established and if not tears down the session. Therefore we'll
-        * limit input processing to 1 packet per cycle, as it traditionally
-        * was, until this problem is rectified.
-        *
-        * @qlyoung June 2017
-        */
-       rpkt_quanta_old = 1;
+       fsm_update_result = 0;
 
        /* Guard against scheduled events that occur after peer deletion. */
        if (peer->status == Deleted || peer->status == Clearing)
@@ -2133,7 +2202,8 @@ int bgp_process_packet(struct thread *thread)
                 */
                switch (type) {
                case BGP_MSG_OPEN:
-                       peer->open_in++;
+                       atomic_fetch_add_explicit(&peer->open_in, 1,
+                                                 memory_order_relaxed);
                        mprc = bgp_open_receive(peer, size);
                        if (mprc == BGP_Stop)
                                zlog_err(
@@ -2141,7 +2211,8 @@ int bgp_process_packet(struct thread *thread)
                                        __FUNCTION__, peer->host);
                        break;
                case BGP_MSG_UPDATE:
-                       peer->update_in++;
+                       atomic_fetch_add_explicit(&peer->update_in, 1,
+                                                 memory_order_relaxed);
                        peer->readtime = monotime(NULL);
                        mprc = bgp_update_receive(peer, size);
                        if (mprc == BGP_Stop)
@@ -2150,7 +2221,8 @@ int bgp_process_packet(struct thread *thread)
                                        __FUNCTION__, peer->host);
                        break;
                case BGP_MSG_NOTIFY:
-                       peer->notify_in++;
+                       atomic_fetch_add_explicit(&peer->notify_in, 1,
+                                                 memory_order_relaxed);
                        mprc = bgp_notify_receive(peer, size);
                        if (mprc == BGP_Stop)
                                zlog_err(
@@ -2159,7 +2231,8 @@ int bgp_process_packet(struct thread *thread)
                        break;
                case BGP_MSG_KEEPALIVE:
                        peer->readtime = monotime(NULL);
-                       peer->keepalive_in++;
+                       atomic_fetch_add_explicit(&peer->keepalive_in, 1,
+                                                 memory_order_relaxed);
                        mprc = bgp_keepalive_receive(peer, size);
                        if (mprc == BGP_Stop)
                                zlog_err(
@@ -2168,7 +2241,8 @@ int bgp_process_packet(struct thread *thread)
                        break;
                case BGP_MSG_ROUTE_REFRESH_NEW:
                case BGP_MSG_ROUTE_REFRESH_OLD:
-                       peer->refresh_in++;
+                       atomic_fetch_add_explicit(&peer->refresh_in, 1,
+                                                 memory_order_relaxed);
                        mprc = bgp_route_refresh_receive(peer, size);
                        if (mprc == BGP_Stop)
                                zlog_err(
@@ -2176,13 +2250,22 @@ int bgp_process_packet(struct thread *thread)
                                        __FUNCTION__, peer->host);
                        break;
                case BGP_MSG_CAPABILITY:
-                       peer->dynamic_cap_in++;
+                       atomic_fetch_add_explicit(&peer->dynamic_cap_in, 1,
+                                                 memory_order_relaxed);
                        mprc = bgp_capability_receive(peer, size);
                        if (mprc == BGP_Stop)
                                zlog_err(
                                        "%s: BGP CAPABILITY receipt failed for peer: %s",
                                        __FUNCTION__, peer->host);
                        break;
+               default:
+                       /*
+                        * The message type should have been sanitized before
+                        * we ever got here. Receipt of a message with an
+                        * invalid header at this point is indicative of a
+                        * security issue.
+                        */
+                       assert (!"Message of invalid type received during input processing");
                }
 
                /* delete processed packet */
@@ -2193,10 +2276,13 @@ int bgp_process_packet(struct thread *thread)
                /* Update FSM */
                if (mprc != BGP_PACKET_NOOP)
                        fsm_update_result = bgp_event_update(peer, mprc);
+               else
+                       continue;
 
-               /* If peer was deleted, do not process any more packets. This is
-                * usually
-                * due to executing BGP_Stop or a stub deletion. */
+               /*
+                * If peer was deleted, do not process any more packets. This
+                * is usually due to executing BGP_Stop or a stub deletion.
+                */
                if (fsm_update_result == FSM_PEER_TRANSFERRED
                    || fsm_update_result == FSM_PEER_STOPPED)
                        break;
@@ -2206,10 +2292,11 @@ int bgp_process_packet(struct thread *thread)
            && fsm_update_result != FSM_PEER_STOPPED) {
                pthread_mutex_lock(&peer->io_mtx);
                {
-                       if (peer->ibuf->count
-                           > 0) // more work to do, come back later
-                               thread_add_event(bm->master, bgp_process_packet,
-                                                peer, 0, NULL);
+                       // more work to do, come back later
+                       if (peer->ibuf->count > 0)
+                               thread_add_timer_msec(
+                                       bm->master, bgp_process_packet, peer, 0,
+                                       &peer->t_process_packet);
                }
                pthread_mutex_unlock(&peer->io_mtx);
        }