]> git.proxmox.com Git - mirror_frr.git/commitdiff
bgpd: add support for l3vpn per-nexthop label
authorPhilippe Guibert <philippe.guibert@6wind.com>
Tue, 28 Feb 2023 13:25:02 +0000 (14:25 +0100)
committerPhilippe Guibert <philippe.guibert@6wind.com>
Tue, 9 May 2023 19:00:57 +0000 (21:00 +0200)
This commit introduces a new method to associate a label to
prefixes to export to a VPNv4 backbone. All the methods to
associate a label to a BGP update is documented in rfc4364,
chapter 4.3.2. Initially, the "single label for an entire
VRF" method was available. This commit adds "single label
for each attachment circuit" method.

The change impacts the control-plane, because each BGP update
is checked to know if the nexthop has reachability in the VRF
or not. If this is the case, then a unique label for a given
destination IP in the VRF will be picked up. This label will
be reused for an other BGP update that will have the same
nexthop IP address.

The change impacts the data-plane, because the MPLs pop
mechanism applied to incoming labelled packets changes: the
MPLS label is popped, and the packet is directly sent to the
connected nexthop described in the previous outgoing BGP VPN
update.

By default per-vrf mode is done, but the user may choose
the per-nexthop mode, by using the vty command from the
previous commit. In the latter case, a per-vrf label
will however be allocated to handle networks that are not directly
connected. This is the case for local traffic for instance.

The change also include the following:

-  ECMP case
In case a route is learnt in a given VRF, and is resolved via an
ECMP nexthop. This implies that when exporting the route as a BGP
update, if label allocation per nexthop is used, then two possible
MPLS values could be picked up, which is not possible with the
current implementation. Actually, the NLRI for VPNv4 stores one
prefix, and one single label value, not two. Today, RFC8277 with
multiple label capability is not yet available.
To avoid this corner case, when a route is resolved via more than one
nexthop, the label allocation per nexthop will not apply, and the
default per-vrf label will be chosen.
Let us imagine BGP redistributes a static route using the `172.31.0.20`
nexthop. The nexthop resolution will find two different nexthops fo a
unique BGP update.

 > r1# show running-config
 > [..]
 > vrf vrf1
 >  ip route 172.31.0.30/32 172.31.0.20
 > r1# show bgp vrf vrf1 nexthop
 > [..]
 > 172.31.0.20 valid [IGP metric 0], #paths 1
 >  gate 192.0.2.11
 >  gate 192.0.2.12
 >  Last update: Mon Jan 16 09:27:09 2023
 >  Paths:
 >    1/1 172.31.0.30/32 VRF vrf1 flags 0x20018

To avoid this situation, BGP updates that resolve over multiple
nexthops are using the unique per-vrf label.

- recursive route case

Prefixes that need a recursive route to be resolved can
also be eligible for mpls allocation per nexthop. In that
case, the nexthop will be the recursive nexthop calculated.

To achieve this, all nexthop types in bnc contexts are valid,
except for the blackhole nexthops.

- network declared prefixes

Nexthop tracking is used to look for the reachability of the
prefixes. When the the 'no bgp network import-check' command
is used, network declared prefixes are maintained active,
even if there is no active nexthop.

Signed-off-by: Philippe Guibert <philippe.guibert@6wind.com>
bgpd/bgp_mplsvpn.c
bgpd/bgp_mplsvpn.h
bgpd/bgp_nexthop.c
bgpd/bgp_nht.c
bgpd/bgp_route.h
bgpd/bgp_zebra.c
bgpd/bgp_zebra.h

index 6c68fe0b44bd531b5ccc413ed1866a56556a47e5..e9391acaf1e2fbf704e0b751e4d44e87f6a116c6 100644 (file)
@@ -1336,6 +1336,248 @@ leak_update(struct bgp *to_bgp, struct bgp_dest *bn,
        return new;
 }
 
+void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi)
+{
+       struct bgp_label_per_nexthop_cache *blnc;
+
+       if (!pi)
+               return;
+
+       blnc = pi->label_nexthop_cache;
+
+       if (!blnc)
+               return;
+
+       LIST_REMOVE(pi, label_nh_thread);
+       pi->label_nexthop_cache->path_count--;
+       pi->label_nexthop_cache = NULL;
+
+       if (LIST_EMPTY(&(blnc->paths)))
+               bgp_label_per_nexthop_free(blnc);
+}
+
+/* Called upon reception of a ZAPI Message from zebra, about
+ * a new available label.
+ */
+static int bgp_mplsvpn_get_label_per_nexthop_cb(mpls_label_t label,
+                                               void *context, bool allocated)
+{
+       struct bgp_label_per_nexthop_cache *blnc = context;
+       mpls_label_t old_label;
+       int debug = BGP_DEBUG(vpn, VPN_LEAK_LABEL);
+       struct bgp_path_info *pi;
+       struct bgp_table *table;
+
+       old_label = blnc->label;
+
+       if (debug)
+               zlog_debug("%s: label=%u, allocated=%d, nexthop=%pFX", __func__,
+                          label, allocated, &blnc->nexthop);
+       if (allocated)
+               /* update the entry with the new label */
+               blnc->label = label;
+       else
+               /*
+                * previously-allocated label is now invalid
+                * eg: zebra deallocated the labels and notifies it
+                */
+               blnc->label = MPLS_INVALID_LABEL;
+
+       if (old_label == blnc->label)
+               return 0; /* no change */
+
+       /* update paths */
+       if (blnc->label != MPLS_INVALID_LABEL)
+               bgp_zebra_send_nexthop_label(ZEBRA_MPLS_LABELS_ADD, blnc->label,
+                                            ZEBRA_LSP_BGP, &blnc->nexthop);
+
+       LIST_FOREACH (pi, &(blnc->paths), label_nh_thread) {
+               if (!pi->net)
+                       continue;
+               table = bgp_dest_table(pi->net);
+               if (!table)
+                       continue;
+               vpn_leak_from_vrf_update(blnc->to_bgp, table->bgp, pi);
+       }
+
+       return 0;
+}
+
+/* Get a per label nexthop value:
+ *  - Find and return a per label nexthop from the cache
+ *  - else allocate a new per label nexthop cache entry and request a
+ *    label to zebra. Return MPLS_INVALID_LABEL
+ */
+static mpls_label_t _vpn_leak_from_vrf_get_per_nexthop_label(
+       struct bgp_path_info *pi, struct bgp *to_bgp, struct bgp *from_bgp,
+       afi_t afi, safi_t safi)
+{
+       struct bgp_nexthop_cache *bnc = pi->nexthop;
+       struct bgp_label_per_nexthop_cache *blnc;
+       struct bgp_label_per_nexthop_cache_head *tree;
+       struct prefix *nh_pfx = NULL;
+       struct prefix nh_gate = {0};
+
+       /* extract the nexthop from the BNC nexthop cache */
+       switch (bnc->nexthop->type) {
+       case NEXTHOP_TYPE_IPV4:
+       case NEXTHOP_TYPE_IPV4_IFINDEX:
+               /* the nexthop is recursive */
+               nh_gate.family = AF_INET;
+               nh_gate.prefixlen = IPV4_MAX_BITLEN;
+               IPV4_ADDR_COPY(&nh_gate.u.prefix4, &bnc->nexthop->gate.ipv4);
+               nh_pfx = &nh_gate;
+               break;
+       case NEXTHOP_TYPE_IPV6:
+       case NEXTHOP_TYPE_IPV6_IFINDEX:
+               /* the nexthop is recursive */
+               nh_gate.family = AF_INET6;
+               nh_gate.prefixlen = IPV6_MAX_BITLEN;
+               IPV6_ADDR_COPY(&nh_gate.u.prefix6, &bnc->nexthop->gate.ipv6);
+               nh_pfx = &nh_gate;
+               break;
+       case NEXTHOP_TYPE_IFINDEX:
+               /* the nexthop is direcly connected */
+               nh_pfx = &bnc->prefix;
+               break;
+       case NEXTHOP_TYPE_BLACKHOLE:
+               assert(!"Blackhole nexthop. Already checked by the caller.");
+       }
+
+       /* find or allocate a nexthop label cache entry */
+       tree = &from_bgp->mpls_labels_per_nexthop[family2afi(nh_pfx->family)];
+       blnc = bgp_label_per_nexthop_find(tree, nh_pfx);
+       if (!blnc) {
+               blnc = bgp_label_per_nexthop_new(tree, nh_pfx);
+               blnc->to_bgp = to_bgp;
+               /* request a label to zebra for this nexthop
+                * the response from zebra will trigger the callback
+                */
+               bgp_lp_get(LP_TYPE_NEXTHOP, blnc,
+                          bgp_mplsvpn_get_label_per_nexthop_cb);
+       }
+
+       if (pi->label_nexthop_cache == blnc)
+               /* no change */
+               return blnc->label;
+
+       /* Unlink from any existing nexthop cache. Free the entry if unused.
+        */
+       bgp_mplsvpn_path_nh_label_unlink(pi);
+       if (blnc) {
+               /* updates NHT pi list reference */
+               LIST_INSERT_HEAD(&(blnc->paths), pi, label_nh_thread);
+               pi->label_nexthop_cache = blnc;
+               pi->label_nexthop_cache->path_count++;
+       }
+       return blnc->label;
+}
+
+/* Filter out all the cases where a per nexthop label is not possible:
+ * - return an invalid label when the nexthop is invalid
+ * - return the per VRF label when the per nexthop label is not supported
+ * Otherwise, find or request a per label nexthop.
+ */
+static mpls_label_t vpn_leak_from_vrf_get_per_nexthop_label(
+       afi_t afi, safi_t safi, struct bgp_path_info *pi, struct bgp *from_bgp,
+       struct bgp *to_bgp)
+{
+       struct bgp_path_info *bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
+       struct bgp *bgp_nexthop = NULL;
+       bool nh_valid;
+       afi_t nh_afi;
+       bool is_bgp_static_route;
+
+       is_bgp_static_route = bpi_ultimate->sub_type == BGP_ROUTE_STATIC &&
+                             bpi_ultimate->type == ZEBRA_ROUTE_BGP;
+
+       if (is_bgp_static_route == false && afi == AFI_IP &&
+           CHECK_FLAG(pi->attr->flag, ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)) &&
+           (pi->attr->nexthop.s_addr == INADDR_ANY ||
+            !ipv4_unicast_valid(&pi->attr->nexthop))) {
+               /* IPv4 nexthop in standard BGP encoding format.
+                * Format of address is not valid (not any, not unicast).
+                * Fallback to the per VRF label.
+                */
+               bgp_mplsvpn_path_nh_label_unlink(pi);
+               return from_bgp->vpn_policy[afi].tovpn_label;
+       }
+
+       if (is_bgp_static_route == false && afi == AFI_IP &&
+           pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV4 &&
+           (pi->attr->mp_nexthop_global_in.s_addr == INADDR_ANY ||
+            !ipv4_unicast_valid(&pi->attr->mp_nexthop_global_in))) {
+               /* IPv4 nexthop is in MP-BGP encoding format.
+                * Format of address is not valid (not any, not unicast).
+                * Fallback to the per VRF label.
+                */
+               bgp_mplsvpn_path_nh_label_unlink(pi);
+               return from_bgp->vpn_policy[afi].tovpn_label;
+       }
+
+       if (is_bgp_static_route == false && afi == AFI_IP6 &&
+           (pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL ||
+            pi->attr->mp_nexthop_len == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) &&
+           (IN6_IS_ADDR_UNSPECIFIED(&pi->attr->mp_nexthop_global) ||
+            IN6_IS_ADDR_LOOPBACK(&pi->attr->mp_nexthop_global) ||
+            IN6_IS_ADDR_MULTICAST(&pi->attr->mp_nexthop_global))) {
+               /* IPv6 nexthop is in MP-BGP encoding format.
+                * Format of address is not valid
+                * Fallback to the per VRF label.
+                */
+               bgp_mplsvpn_path_nh_label_unlink(pi);
+               return from_bgp->vpn_policy[afi].tovpn_label;
+       }
+
+       /* Check the next-hop reachability.
+        * Get the bgp instance where the bgp_path_info originates.
+        */
+       if (pi->extra && pi->extra->bgp_orig)
+               bgp_nexthop = pi->extra->bgp_orig;
+       else
+               bgp_nexthop = from_bgp;
+
+       nh_afi = BGP_ATTR_NH_AFI(afi, pi->attr);
+       nh_valid = bgp_find_or_add_nexthop(from_bgp, bgp_nexthop, nh_afi, safi,
+                                          pi, NULL, 0, NULL);
+
+       if (!nh_valid && is_bgp_static_route &&
+           !CHECK_FLAG(from_bgp->flags, BGP_FLAG_IMPORT_CHECK)) {
+               /* "network" prefixes not routable, but since 'no bgp network
+                * import-check' is configured, they are always valid in the BGP
+                * table. Fallback to the per-vrf label
+                */
+               bgp_mplsvpn_path_nh_label_unlink(pi);
+               return from_bgp->vpn_policy[afi].tovpn_label;
+       }
+
+       if (!nh_valid || !pi->nexthop || pi->nexthop->nexthop_num == 0 ||
+           !pi->nexthop->nexthop) {
+               /* invalid next-hop:
+                * do not send the per-vrf label
+                * otherwise, when the next-hop becomes valid,
+                * we will have 2 BGP updates:
+                * - one with the per-vrf label
+                * - the second with the per-nexthop label
+                */
+               bgp_mplsvpn_path_nh_label_unlink(pi);
+               return MPLS_INVALID_LABEL;
+       }
+
+       if (pi->nexthop->nexthop_num > 1 ||
+           pi->nexthop->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
+               /* Blackhole or ECMP routes
+                * is not compatible with per-nexthop label.
+                * Fallback to per-vrf label.
+                */
+               bgp_mplsvpn_path_nh_label_unlink(pi);
+               return from_bgp->vpn_policy[afi].tovpn_label;
+       }
+
+       return _vpn_leak_from_vrf_get_per_nexthop_label(pi, to_bgp, from_bgp,
+                                                       afi, safi);
+}
+
 /* cf vnc_import_bgp_add_route_mode_nvegroup() and add_vnc_route() */
 void vpn_leak_from_vrf_update(struct bgp *to_bgp,           /* to */
                              struct bgp *from_bgp,        /* from */
@@ -1528,7 +1770,28 @@ void vpn_leak_from_vrf_update(struct bgp *to_bgp,             /* to */
                nexthop_self_flag = 1;
        }
 
-       label_val = from_bgp->vpn_policy[afi].tovpn_label;
+       if (CHECK_FLAG(from_bgp->vpn_policy[afi].flags,
+                      BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP))
+               /* per nexthop label mode */
+               label_val = vpn_leak_from_vrf_get_per_nexthop_label(
+                       afi, safi, path_vrf, from_bgp, to_bgp);
+       else
+               /* per VRF label mode */
+               label_val = from_bgp->vpn_policy[afi].tovpn_label;
+
+       if (label_val == MPLS_INVALID_LABEL &&
+           CHECK_FLAG(from_bgp->vpn_policy[afi].flags,
+                      BGP_VPN_POLICY_TOVPN_LABEL_PER_NEXTHOP)) {
+               /* no valid label for the moment
+                * when the 'bgp_mplsvpn_get_label_per_nexthop_cb' callback gets
+                * a valid label value, it will call the current function again.
+                */
+               if (debug)
+                       zlog_debug(
+                               "%s: %s skipping: waiting for a valid per-label nexthop.",
+                               __func__, from_bgp->name_pretty);
+               return;
+       }
        if (label_val == MPLS_LABEL_NONE)
                encode_label(MPLS_LABEL_IMPLICIT_NULL, &label);
        else
@@ -1769,6 +2032,8 @@ void vpn_leak_from_vrf_withdraw_all(struct bgp *to_bgp, struct bgp *from_bgp,
                                                bpi, afi, safi);
                                        bgp_path_info_delete(bn, bpi);
                                        bgp_process(to_bgp, bn, afi, safi);
+                                       bgp_mplsvpn_path_nh_label_unlink(
+                                               bpi->extra->parent);
                                }
                        }
                }
index c832b4abd444da426a2c101415637842f4f82873..75758edcc2bb4085786cd750352f58ccfcef30c5 100644 (file)
@@ -31,6 +31,7 @@
 #define BGP_PREFIX_SID_SRV6_MAX_FUNCTION_LENGTH 20
 
 extern void bgp_mplsvpn_init(void);
+extern void bgp_mplsvpn_path_nh_label_unlink(struct bgp_path_info *pi);
 extern int bgp_nlri_parse_vpn(struct peer *, struct attr *, struct bgp_nlri *);
 extern uint32_t decode_label(mpls_label_t *);
 extern void encode_label(mpls_label_t, mpls_label_t *);
index 1c79d7d03be34ce8bea84cd64517500c71ec3c60..c878512389d7daadef849ca27c82ab0ba8f92ffb 100644 (file)
@@ -31,6 +31,7 @@
 #include "bgpd/bgp_fsm.h"
 #include "bgpd/bgp_vty.h"
 #include "bgpd/bgp_rd.h"
+#include "bgpd/bgp_mplsvpn.h"
 
 DEFINE_MTYPE_STATIC(BGPD, MARTIAN_STRING, "BGP Martian Addr Intf String");
 
@@ -119,6 +120,8 @@ static void bgp_nexthop_cache_reset(struct bgp_nexthop_cache_head *tree)
                while (!LIST_EMPTY(&(bnc->paths))) {
                        struct bgp_path_info *path = LIST_FIRST(&(bnc->paths));
 
+                       bgp_mplsvpn_path_nh_label_unlink(path);
+
                        path_nh_map(path, bnc, false);
                }
 
index a294ebcc63062a5c956e248786adbe1ea7a8d94c..39aff8d500d9b070ce653c1a8b0e5ffb80ccd047 100644 (file)
@@ -31,6 +31,7 @@
 #include "bgpd/bgp_flowspec_util.h"
 #include "bgpd/bgp_evpn.h"
 #include "bgpd/bgp_rd.h"
+#include "bgpd/bgp_mplsvpn.h"
 
 extern struct zclient *zclient;
 
@@ -149,6 +150,8 @@ void bgp_unlink_nexthop(struct bgp_path_info *path)
 {
        struct bgp_nexthop_cache *bnc = path->nexthop;
 
+       bgp_mplsvpn_path_nh_label_unlink(path);
+
        if (!bnc)
                return;
 
@@ -1230,7 +1233,16 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
                        SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
 
                path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
-               if (path_valid != bnc_is_valid_nexthop) {
+               if (path->type == ZEBRA_ROUTE_BGP &&
+                   path->sub_type == BGP_ROUTE_STATIC &&
+                   !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
+                       /* static routes with 'no bgp network import-check' are
+                        * always valid. if nht is called with static routes,
+                        * the vpn exportation needs to be triggered
+                        */
+                       vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
+                                                path);
+               else if (path_valid != bnc_is_valid_nexthop) {
                        if (path_valid) {
                                /* No longer valid, clear flag; also for EVPN
                                 * routes, unimport from VRFs if needed.
@@ -1243,6 +1255,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
                                    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
                                        bgp_evpn_unimport_route(bgp_path,
                                                afi, safi, bgp_dest_get_prefix(dest), path);
+                               if (safi == SAFI_UNICAST &&
+                                   (bgp_path->inst_type !=
+                                    BGP_INSTANCE_TYPE_VIEW))
+                                       vpn_leak_from_vrf_withdraw(
+                                               bgp_get_default(), bgp_path,
+                                               path);
                        } else {
                                /* Path becomes valid, set flag; also for EVPN
                                 * routes, import from VRFs if needed.
@@ -1255,6 +1273,12 @@ void evaluate_paths(struct bgp_nexthop_cache *bnc)
                                    bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
                                        bgp_evpn_import_route(bgp_path,
                                                afi, safi, bgp_dest_get_prefix(dest), path);
+                               if (safi == SAFI_UNICAST &&
+                                   (bgp_path->inst_type !=
+                                    BGP_INSTANCE_TYPE_VIEW))
+                                       vpn_leak_from_vrf_update(
+                                               bgp_get_default(), bgp_path,
+                                               path);
                        }
                }
 
index a64144b62557fcb1197bd42814ad1ca6194e834e..fbdd5fae7d68a949a0c64d1a85788afd303062e9 100644 (file)
@@ -319,6 +319,12 @@ struct bgp_path_info {
        /* Addpath identifiers */
        uint32_t addpath_rx_id;
        struct bgp_addpath_info_data tx_addpath;
+
+       /* For nexthop per label linked list */
+       LIST_ENTRY(bgp_path_info) label_nh_thread;
+
+       /* Back pointer to the bgp label per nexthop structure */
+       struct bgp_label_per_nexthop_cache *label_nexthop_cache;
 };
 
 /* Structure used in BGP path selection */
index 96b1f3e00f4c1776e9f9028b240869b7643a2838..f0724f4eb1cb91d3cdccbff28b85f072a4d12948 100644 (file)
@@ -3911,3 +3911,26 @@ int bgp_zebra_srv6_manager_release_locator_chunk(const char *name)
 {
        return srv6_manager_release_locator_chunk(zclient, name);
 }
+
+void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label,
+                                 enum lsp_types_t ltype, struct prefix *p)
+{
+       struct zapi_labels zl = {};
+       struct zapi_nexthop *znh;
+
+       zl.type = ltype;
+       zl.local_label = label;
+       zl.nexthop_num = 1;
+       znh = &zl.nexthops[0];
+       if (p->family == AF_INET)
+               IPV4_ADDR_COPY(&znh->gate.ipv4, &p->u.prefix4);
+       else
+               IPV6_ADDR_COPY(&znh->gate.ipv6, &p->u.prefix6);
+       znh->type =
+               (p->family == AF_INET) ? NEXTHOP_TYPE_IPV4 : NEXTHOP_TYPE_IPV6;
+       znh->ifindex = 0;
+       znh->label_num = 0;
+
+       /* vrf_id is DEFAULT_VRF */
+       zebra_send_mpls_labels(zclient, cmd, &zl);
+}
index b09be890e5eef7d09fb430f66594a8a2d3e88343..6a266e1a673416985365f8d8b5ad8d22cf61a2a4 100644 (file)
@@ -118,4 +118,7 @@ extern int bgp_zebra_update(struct bgp *bgp, afi_t afi, safi_t safi,
 extern int bgp_zebra_stale_timer_update(struct bgp *bgp);
 extern int bgp_zebra_srv6_manager_get_locator_chunk(const char *name);
 extern int bgp_zebra_srv6_manager_release_locator_chunk(const char *name);
+extern void bgp_zebra_send_nexthop_label(int cmd, mpls_label_t label,
+                                        enum lsp_types_t ltype,
+                                        struct prefix *p);
 #endif /* _QUAGGA_BGP_ZEBRA_H */