]> git.proxmox.com Git - mirror_frr.git/commitdiff
bgpd: support for DF election in EVPN-MH
authorAnuradha Karuppiah <anuradhak@cumulusnetworks.com>
Fri, 8 May 2020 23:35:09 +0000 (16:35 -0700)
committerAnuradha Karuppiah <anuradhak@cumulusnetworks.com>
Mon, 26 Oct 2020 17:26:21 +0000 (10:26 -0700)
DF (Designated forwarder) election is used for picking a single
BUM-traffic forwarded per-ES. RFC7432 specifies a mechanism called
service carving for DF election. However that mechanism has many
disadvantages -
1. LBs poorly.
2. Doesn't allow for a controlled failover needed in upgrade
scenarios.
3. Not easy to hw accelerate.

To fix the poor performance of service carving alternate DF mechanisms
have been proposed via the following drafts -
draft-ietf-bess-evpn-df-election-framework
draft-ietf-bess-evpn-pref-df

This commit adds support for the pref-df election mechanism which
is used as the default. Other mechanisms including service-carving
may be added later.

In this mechanism one switch on an ES is elected as DF based on the
preference value; higher preference wins with IP address acting
as the tie-breaker (lower-IP wins if pref value is the same).

Sample output
=============
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
torm-11# sh bgp l2vpn evpn es 03:00:00:00:00:01:11:00:00:01
ESI: 03:00:00:00:00:01:11:00:00:01
 Type: LR
 RD: 27.0.0.15:6
 Originator-IP: 27.0.0.15
 Local ES DF preference: 100
 VNI Count: 10
 Remote VNI Count: 10
 Inconsistent VNI VTEP Count: 0
 Inconsistencies: -
 VTEPs:
  27.0.0.16 flags: EA df_alg: preference df_pref: 32767
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
torm-11# sh bgp l2vpn evpn route esi 03:00:00:00:00:01:11:00:00:01
*> [4]:[03:00:00:00:00:01:11:00:00:01]:[32]:[27.0.0.15]
                    27.0.0.15                          32768 i
                    ET:8 ES-Import-Rt:00:00:00:00:01:11 DF: (alg: 2, pref: 100)
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

Signed-off-by: Anuradha Karuppiah <anuradhak@cumulusnetworks.com>
16 files changed:
bgpd/bgp_attr.c
bgpd/bgp_attr.h
bgpd/bgp_attr_evpn.c
bgpd/bgp_attr_evpn.h
bgpd/bgp_ecommunity.c
bgpd/bgp_ecommunity.h
bgpd/bgp_evpn_mh.c
bgpd/bgp_evpn_mh.h
bgpd/bgp_evpn_private.h
bgpd/bgp_evpn_vty.c
bgpd/bgp_zebra.c
lib/prefix.c
lib/prefix.h
lib/stream.h
lib/vxlan.h
lib/zclient.h

index 429a68d190e4e130a94bdf57fe3b5af641e6035e..b94e24e870b2029146c9c80beb5a93658237224b 100644 (file)
@@ -725,6 +725,8 @@ bool attrhash_cmp(const void *p1, const void *p2)
                    && !memcmp(&attr1->esi, &attr2->esi, sizeof(esi_t))
                    && attr1->es_flags == attr2->es_flags
                    && attr1->mm_sync_seqnum == attr2->mm_sync_seqnum
+                   && attr1->df_pref == attr2->df_pref
+                   && attr1->df_alg == attr2->df_alg
                    && attr1->nh_ifindex == attr2->nh_ifindex
                    && attr1->nh_lla_ifindex == attr2->nh_lla_ifindex
                    && attr1->distance == attr2->distance
@@ -2247,6 +2249,9 @@ bgp_attr_ext_communities(struct bgp_attr_parser_args *args)
 
        attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
 
+       /* Extract DF election preference and  mobility sequence number */
+       attr->df_pref = bgp_attr_df_pref_from_ec(attr, &attr->df_alg);
+
        /* Extract MAC mobility sequence number, if any. */
        attr->mm_seqnum = bgp_attr_mac_mobility_seqnum(attr, &sticky);
        attr->sticky = sticky;
index e6e953364bebe5d0c0d199e55054622b601accd1..ef0e74344a968be7dc0adaa2ae4d55a58b862597 100644 (file)
@@ -294,6 +294,10 @@ struct attr {
 
        /* SR-TE Color */
        uint32_t srte_color;
+
+       /* EVPN DF preference and algorithm for DF election on local ESs */
+       uint16_t df_pref;
+       uint8_t df_alg;
 };
 
 /* rmap_change_flags definition */
index aa0c59f3a7a57942a6f6c47c15e1d185f27408bb..7cc9ecd79edfc4a03bccda858e9e1bb84693bd1a 100644 (file)
@@ -26,6 +26,7 @@
 #include "log.h"
 #include "memory.h"
 #include "stream.h"
+#include "vxlan.h"
 
 #include "bgpd/bgpd.h"
 #include "bgpd/bgp_attr.h"
@@ -145,6 +146,43 @@ uint8_t bgp_attr_default_gw(struct attr *attr)
        return 0;
 }
 
+/*
+ * Fetch and return the DF preference and algorithm from
+ * DF election extended community, if present, else 0.
+ */
+uint16_t bgp_attr_df_pref_from_ec(struct attr *attr, uint8_t *alg)
+{
+       struct ecommunity *ecom;
+       int i;
+       uint16_t df_pref = 0;
+
+       *alg = EVPN_MH_DF_ALG_SERVICE_CARVING;
+       ecom = attr->ecommunity;
+       if (!ecom || !ecom->size)
+               return 0;
+
+       for (i = 0; i < ecom->size; i++) {
+               uint8_t *pnt;
+               uint8_t type, sub_type;
+
+               pnt = (ecom->val + (i * ECOMMUNITY_SIZE));
+               type = *pnt++;
+               sub_type = *pnt++;
+               if (!(type == ECOMMUNITY_ENCODE_EVPN
+                     && sub_type == ECOMMUNITY_EVPN_SUBTYPE_DF_ELECTION))
+                       continue;
+
+               *alg = (*pnt++) & ECOMMUNITY_EVPN_SUBTYPE_DF_ALG_BITS;
+
+               pnt += 3;
+               pnt = ptr_get_be16(pnt, &df_pref);
+               (void)pnt; /* consume value */
+               break;
+       }
+
+       return df_pref;
+}
+
 /*
  * Fetch and return the sequence number from MAC Mobility extended
  * community, if present, else 0.
index 19c028a8262c11109874b988345c79e13968df22..6fdf73fd1e32583fed6aa4883dcace75131c8e5e 100644 (file)
@@ -48,6 +48,7 @@ extern uint8_t bgp_attr_default_gw(struct attr *attr);
 
 extern void bgp_attr_evpn_na_flag(struct attr *attr, uint8_t *router_flag,
                bool *proxy);
+extern uint16_t bgp_attr_df_pref_from_ec(struct attr *attr, uint8_t *alg);
 
 extern bool is_zero_gw_ip(const union gw_addr *gw_ip, afi_t afi);
 
index 353b003c3bddca82e09f1e955ba1685e238a2553..de3757aebb3f6c6ab4d0ca97fcfd5f0cf10d8718 100644 (file)
@@ -1038,6 +1038,27 @@ char *ecommunity_ecom2str(struct ecommunity *ecom, int format, int filter)
                                        (flags &
                                         ECOMMUNITY_EVPN_SUBTYPE_ESI_SA_FLAG) ?
                                        "SA":"AA");
+                       } else if (*pnt
+                                  == ECOMMUNITY_EVPN_SUBTYPE_DF_ELECTION) {
+                               uint8_t alg;
+                               uint16_t pref;
+                               uint16_t bmap;
+
+                               alg = *(pnt + 1);
+                               memcpy(&bmap, pnt + 2, 2);
+                               bmap = ntohs(bmap);
+                               memcpy(&pref, pnt + 5, 2);
+                               pref = ntohs(pref);
+
+                               if (bmap)
+                                       snprintf(
+                                               encbuf, sizeof(encbuf),
+                                               "DF: (alg: %u, bmap: 0x%x pref: %u)",
+                                               alg, bmap, pref);
+                               else
+                                       snprintf(encbuf, sizeof(encbuf),
+                                                "DF: (alg: %u, pref: %u)", alg,
+                                                pref);
                        } else
                                unk_ecom = 1;
                } else if (type == ECOMMUNITY_ENCODE_REDIRECT_IP_NH) {
index fe90efe1f7d1d749b48661ae23b8bfd36a28fe97..e9c52287f1e52e5c4b5880b6a8cf4c18c280caf9 100644 (file)
 #define ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL    0x01
 #define ECOMMUNITY_EVPN_SUBTYPE_ES_IMPORT_RT 0x02
 #define ECOMMUNITY_EVPN_SUBTYPE_ROUTERMAC    0x03
+#define ECOMMUNITY_EVPN_SUBTYPE_DF_ELECTION 0x06
 #define ECOMMUNITY_EVPN_SUBTYPE_DEF_GW       0x0d
 #define ECOMMUNITY_EVPN_SUBTYPE_ND           0x08
 
 #define ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY_FLAG_STICKY 0x01
 
+/* DF alg bits - only lower 5 bits are applicable */
+#define ECOMMUNITY_EVPN_SUBTYPE_DF_ALG_BITS 0x1f
+
 #define ECOMMUNITY_EVPN_SUBTYPE_ND_ROUTER_FLAG   0x01
 #define ECOMMUNITY_EVPN_SUBTYPE_ND_OVERRIDE_FLAG 0x02
 #define ECOMMUNITY_EVPN_SUBTYPE_PROXY_FLAG       0x04
index 8f81278dee01398707d772795cde0194297c6583..2e49e85a77590d09317473062884549556b66146 100644 (file)
@@ -54,7 +54,10 @@ static void bgp_evpn_local_es_down(struct bgp *bgp,
 static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp,
                struct bgp_evpn_es *es);
 static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
-               struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr);
+                                                    struct bgp_evpn_es *es,
+                                                    struct in_addr vtep_ip,
+                                                    bool esr, uint8_t df_alg,
+                                                    uint16_t df_pref);
 static void bgp_evpn_es_vtep_del(struct bgp *bgp,
                struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr);
 static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es);
@@ -111,9 +114,10 @@ static int bgp_evpn_es_route_select_install(struct bgp *bgp,
            && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
            && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) {
                if (bgp_zebra_has_route_changed(old_select)) {
-                       bgp_evpn_es_vtep_add(bgp, es,
-                                       old_select->attr->nexthop,
-                                       true /*esr*/);
+                       bgp_evpn_es_vtep_add(bgp, es, old_select->attr->nexthop,
+                                            true /*esr*/,
+                                            old_select->attr->df_alg,
+                                            old_select->attr->df_pref);
                }
                UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG);
                bgp_zebra_clear_route_change_flags(dest);
@@ -140,8 +144,9 @@ static int bgp_evpn_es_route_select_install(struct bgp *bgp,
 
        if (new_select && new_select->type == ZEBRA_ROUTE_BGP
                        && new_select->sub_type == BGP_ROUTE_IMPORTED) {
-               bgp_evpn_es_vtep_add(bgp, es,
-                               new_select->attr->nexthop, true /*esr */);
+               bgp_evpn_es_vtep_add(bgp, es, new_select->attr->nexthop,
+                                    true /*esr */, new_select->attr->df_alg,
+                                    new_select->attr->df_pref);
        } else {
                if (old_select && old_select->type == ZEBRA_ROUTE_BGP
                                && old_select->sub_type == BGP_ROUTE_IMPORTED)
@@ -508,8 +513,10 @@ static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es,
 
 /*****************************************************************************
  * Ethernet Segment (Type-4) Routes
- * ESRs are used for BUM handling. XXX - BUM support is planned for phase-2 i.e.
- * this code is just a place holder for now
+ * ESRs are used for DF election. Currently service-carving described in
+ * RFC 7432 is NOT supported. Instead preference based DF election is
+ * used by default.
+ * Reference: draft-ietf-bess-evpn-pref-df
  */
 /* Build extended community for EVPN ES (type-4) route */
 static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es,
@@ -517,8 +524,10 @@ static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es,
 {
        struct ecommunity ecom_encap;
        struct ecommunity ecom_es_rt;
+       struct ecommunity ecom_df;
        struct ecommunity_val eval;
        struct ecommunity_val eval_es_rt;
+       struct ecommunity_val eval_df;
        bgp_encap_types tnl_type;
        struct ethaddr mac;
 
@@ -542,6 +551,13 @@ static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es,
        attr->ecommunity =
                ecommunity_merge(attr->ecommunity, &ecom_es_rt);
 
+       /* DF election extended community */
+       memset(&ecom_df, 0, sizeof(ecom_df));
+       encode_df_elect_extcomm(&eval_df, es->df_pref);
+       ecom_df.size = 1;
+       ecom_df.val = (uint8_t *)eval_df.val;
+       attr->ecommunity = ecommunity_merge(attr->ecommunity, &ecom_df);
+
        attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
 }
 
@@ -1142,6 +1158,7 @@ static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp,
 {
        struct bgp_evpn_es *es = es_vtep->es;
        struct stream *s;
+       uint32_t flags = 0;
 
        /* Check socket. */
        if (!zclient || zclient->sock < 0)
@@ -1155,6 +1172,9 @@ static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp,
                return 0;
        }
 
+       if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+               flags |= ZAPI_ES_VTEP_FLAG_ESR_RXED;
+
        s = zclient->obuf;
        stream_reset(s);
 
@@ -1163,6 +1183,11 @@ static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp,
                bgp->vrf_id);
        stream_put(s, &es->esi, sizeof(esi_t));
        stream_put_ipv4(s, es_vtep->vtep_ip.s_addr);
+       if (add) {
+               stream_putl(s, flags);
+               stream_putc(s, es_vtep->df_alg);
+               stream_putw(s, es_vtep->df_pref);
+       }
 
        stream_putw_at(s, 0, stream_get_endp(s));
 
@@ -1174,7 +1199,8 @@ static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp,
 }
 
 static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp,
-               struct bgp_evpn_es_vtep *es_vtep)
+                                           struct bgp_evpn_es_vtep *es_vtep,
+                                           bool param_change)
 {
        bool old_active;
        bool new_active;
@@ -1190,25 +1216,30 @@ static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp,
 
        new_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE);
 
-       if (old_active == new_active)
-               return;
+       if ((old_active != new_active) || (new_active && param_change)) {
 
-       if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
-               zlog_debug("es %s vtep %pI4 %s", es_vtep->es->esi_str,
-                          &es_vtep->vtep_ip,
-                          new_active ? "active" : "inactive");
+               if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+                       zlog_debug("es %s vtep %pI4 %s df %u/%u",
+                                  es_vtep->es->esi_str, &es_vtep->vtep_ip,
+                                  new_active ? "active" : "inactive",
+                                  es_vtep->df_alg, es_vtep->df_pref);
 
-       /* send remote ES to zebra */
-       bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active);
+               /* send remote ES to zebra */
+               bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active);
 
-       /* queue up the es for background consistency checks */
-       bgp_evpn_es_cons_checks_pend_add(es_vtep->es);
+               /* queue up the es for background consistency checks */
+               bgp_evpn_es_cons_checks_pend_add(es_vtep->es);
+       }
 }
 
 static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
-               struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr)
+                                                    struct bgp_evpn_es *es,
+                                                    struct in_addr vtep_ip,
+                                                    bool esr, uint8_t df_alg,
+                                                    uint16_t df_pref)
 {
        struct bgp_evpn_es_vtep *es_vtep;
+       bool param_change = false;
 
        es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip);
 
@@ -1216,15 +1247,23 @@ static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
                es_vtep = bgp_evpn_es_vtep_new(es, vtep_ip);
 
        if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
-               zlog_debug("es %s vtep %pI4 add %s", es_vtep->es->esi_str,
-                          &es_vtep->vtep_ip, esr ? "esr" : "ead");
+               zlog_debug("es %s vtep %pI4 add %s df %u/%u",
+                          es_vtep->es->esi_str, &es_vtep->vtep_ip,
+                          esr ? "esr" : "ead", df_alg, df_pref);
 
-       if (esr)
+       if (esr) {
                SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR);
-       else
+               if ((es_vtep->df_pref != df_pref)
+                   || (es_vtep->df_alg != df_alg)) {
+                       param_change = true;
+                       es_vtep->df_pref = df_pref;
+                       es_vtep->df_alg = df_alg;
+               }
+       } else {
                ++es_vtep->evi_cnt;
+       }
 
-       bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep);
+       bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change);
 
        return es_vtep;
 }
@@ -1232,17 +1271,24 @@ static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp,
 static void bgp_evpn_es_vtep_do_del(struct bgp *bgp,
                struct bgp_evpn_es_vtep *es_vtep, bool esr)
 {
+       bool param_change = false;
+
        if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
                zlog_debug("es %s vtep %pI4 del %s", es_vtep->es->esi_str,
                           &es_vtep->vtep_ip, esr ? "esr" : "ead");
        if (esr) {
                UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR);
+               if (es_vtep->df_pref || es_vtep->df_alg) {
+                       param_change = true;
+                       es_vtep->df_pref = 0;
+                       es_vtep->df_alg = 0;
+               }
        } else {
                if (es_vtep->evi_cnt)
                        --es_vtep->evi_cnt;
        }
 
-       bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep);
+       bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep, param_change);
        bgp_evpn_es_vtep_free(es_vtep);
 }
 
@@ -1424,32 +1470,43 @@ static void bgp_evpn_local_es_down(struct bgp *bgp,
 }
 
 /* Process ES link oper-up by generating ES-EAD and ESR */
-static void bgp_evpn_local_es_up(struct bgp *bgp, struct bgp_evpn_es *es)
+static void bgp_evpn_local_es_up(struct bgp *bgp, struct bgp_evpn_es *es,
+                                bool regen_esr)
 {
        struct prefix_evpn p;
+       bool regen_ead = false;
 
-       if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP))
-               return;
-
-       SET_FLAG(es->flags, BGP_EVPNES_OPER_UP);
+       if (!CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) {
+               if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+                       zlog_debug("local es %s up", es->esi_str);
 
-       if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
-               zlog_debug("local es %s up", es->esi_str);
+               SET_FLAG(es->flags, BGP_EVPNES_OPER_UP);
+               regen_esr = true;
+               regen_ead = true;
+       }
 
-       /* generate ESR */
-       build_evpn_type4_prefix(&p, &es->esi, es->originator_ip);
-       if (bgp_evpn_type4_route_update(bgp, es, &p))
-               flog_err(EC_BGP_EVPN_ROUTE_CREATE,
-                               "%u: Type4 route creation failure for ESI %s",
-                               bgp->vrf_id, es->esi_str);
+       if (regen_esr) {
+               if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+                       zlog_debug("local es %s generate ESR", es->esi_str);
+               /* generate ESR */
+               build_evpn_type4_prefix(&p, &es->esi, es->originator_ip);
+               if (bgp_evpn_type4_route_update(bgp, es, &p))
+                       flog_err(EC_BGP_EVPN_ROUTE_CREATE,
+                                "%u: Type4 route creation failure for ESI %s",
+                                bgp->vrf_id, es->esi_str);
+       }
 
-       /* generate EAD-EVI */
-       bgp_evpn_local_type1_evi_route_add(bgp, es);
+       if (regen_ead) {
+               if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
+                       zlog_debug("local es %s generate EAD", es->esi_str);
+               /* generate EAD-EVI */
+               bgp_evpn_local_type1_evi_route_add(bgp, es);
 
-       /* generate EAD-ES */
-       build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG,
-                       &es->esi, es->originator_ip);
-       bgp_evpn_type1_route_update(bgp, es, NULL, &p);
+               /* generate EAD-ES */
+               build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, &es->esi,
+                                       es->originator_ip);
+               bgp_evpn_type1_route_update(bgp, es, NULL, &p);
+       }
 }
 
 static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es)
@@ -1507,11 +1564,13 @@ int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi)
  * ES.
  */
 int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
-               struct in_addr originator_ip, bool oper_up)
+                         struct in_addr originator_ip, bool oper_up,
+                         uint16_t df_pref)
 {
        char buf[ESI_STR_LEN];
        struct bgp_evpn_es *es;
        bool new_es = true;
+       bool regen_esr = false;
 
        /* create the new es */
        es = bgp_evpn_es_find(esi);
@@ -1529,10 +1588,14 @@ int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
        }
 
        if (BGP_DEBUG(evpn_mh, EVPN_MH_ES))
-               zlog_debug("add local es %s orig-ip %pI4", es->esi_str,
-                          &originator_ip);
+               zlog_debug("add local es %s orig-ip %pI4 df_pref %u", es->esi_str,
+                          &originator_ip, df_pref);
 
        es->originator_ip = originator_ip;
+       if (df_pref != es->df_pref) {
+               es->df_pref = df_pref;
+               regen_esr = true;
+       }
        bgp_evpn_es_local_info_set(bgp, es);
 
        /* import all remote Type-4 routes in the ES table */
@@ -1551,7 +1614,7 @@ int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
         * can be generated even if the link is inactive.
         */
        if (oper_up)
-               bgp_evpn_local_es_up(bgp, es);
+               bgp_evpn_local_es_up(bgp, es, regen_esr);
        else
                bgp_evpn_local_es_down(bgp, es);
 
@@ -1621,12 +1684,49 @@ static void bgp_evpn_es_json_vtep_fill(json_object *json_vteps,
                if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
                        json_array_string_add(json_flags, "active");
                json_object_object_add(json_vtep_entry, "flags", json_flags);
+               if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) {
+                       json_object_int_add(json_vtep_entry, "dfPreference",
+                                           es_vtep->df_pref);
+                       json_object_int_add(json_vtep_entry, "dfAlgorithm",
+                                           es_vtep->df_pref);
+               }
        }
 
        json_object_array_add(json_vteps,
                        json_vtep_entry);
 }
 
+static void bgp_evpn_es_vteps_show_detail(struct vty *vty,
+                                         struct bgp_evpn_es *es)
+{
+       char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ];
+       struct listnode *node;
+       struct bgp_evpn_es_vtep *es_vtep;
+       char alg_buf[EVPN_DF_ALG_STR_LEN];
+
+       for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) {
+               vtep_flag_str[0] = '\0';
+               if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+                       strlcat(vtep_flag_str, "E", sizeof(vtep_flag_str));
+               if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE)
+                       strlcat(vtep_flag_str, "A", sizeof(vtep_flag_str));
+
+               if (!strlen(vtep_flag_str))
+                       strlcat(vtep_flag_str, "-", sizeof(vtep_flag_str));
+
+               vty_out(vty, "  %s flags: %s", inet_ntoa(es_vtep->vtep_ip),
+                       vtep_flag_str);
+
+               if (es_vtep->flags & BGP_EVPNES_VTEP_ESR)
+                       vty_out(vty, " df_alg: %s df_pref: %u\n",
+                               evpn_es_df_alg2str(es_vtep->df_alg, alg_buf,
+                                                  sizeof(alg_buf)),
+                               es_vtep->df_pref);
+               else
+                       vty_out(vty, "\n");
+       }
+}
+
 static void bgp_evpn_es_show_entry(struct vty *vty,
                struct bgp_evpn_es *es, json_object *json)
 {
@@ -1695,6 +1795,9 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
        if (json) {
                json_object *json_flags;
                json_object *json_incons;
+               json_object *json_vteps;
+               struct listnode *node;
+               struct bgp_evpn_es_vtep *es_vtep;
 
                /* Add the "brief" info first */
                bgp_evpn_es_show_entry(vty, es, json);
@@ -1715,6 +1818,14 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                                es->remote_es_evi_cnt);
                json_object_int_add(json, "inconsistentVniVtepCount",
                                es->incons_evi_vtep_cnt);
+               if (listcount(es->es_vtep_list)) {
+                       json_vteps = json_object_new_array();
+                       for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node,
+                                                 es_vtep)) {
+                               bgp_evpn_es_json_vtep_fill(json_vteps, es_vtep);
+                       }
+                       json_object_object_add(json, "vteps", json_vteps);
+               }
                if (es->inconsistencies) {
                        json_incons = json_object_new_array();
                        if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST)
@@ -1726,7 +1837,6 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
        } else {
                char incons_str[BGP_EVPNES_INCONS_STR_SZ];
                char type_str[4];
-               char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ];
                char buf1[RD_ADDRSTRLEN];
 
                type_str[0] = '\0';
@@ -1735,10 +1845,6 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                if (es->flags & BGP_EVPNES_REMOTE)
                        strlcat(type_str, "R", sizeof(type_str));
 
-               bgp_evpn_es_vteps_str(vtep_str, es, sizeof(vtep_str));
-               if (!strlen(vtep_str))
-                       strlcpy(buf1, "-", sizeof(buf1));
-
                if (es->flags & BGP_EVPNES_LOCAL)
                        prefix_rd2str(&es->prd, buf1, sizeof(buf1));
                else
@@ -1748,6 +1854,9 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                vty_out(vty, " Type: %s\n", type_str);
                vty_out(vty, " RD: %s\n", buf1);
                vty_out(vty, " Originator-IP: %pI4\n", &es->originator_ip);
+               if (es->flags & BGP_EVPNES_LOCAL)
+                       vty_out(vty, " Local ES DF preference: %u\n",
+                               es->df_pref);
                vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list));
                vty_out(vty, " Remote VNI Count: %d\n",
                                es->remote_es_evi_cnt);
@@ -1763,7 +1872,10 @@ static void bgp_evpn_es_show_entry_detail(struct vty *vty,
                }
                vty_out(vty, " Inconsistencies: %s\n",
                                incons_str);
-               vty_out(vty, " VTEPs: %s\n", vtep_str);
+               if (listcount(es->es_vtep_list)) {
+                       vty_out(vty, " VTEPs:\n");
+                       bgp_evpn_es_vteps_show_detail(vty, es);
+               }
                vty_out(vty, "\n");
        }
 }
@@ -1936,7 +2048,8 @@ static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp,
                struct bgp_evpn_es_vtep *es_vtep;
 
                es_vtep = bgp_evpn_es_vtep_add(bgp, evi_vtep->es_evi->es,
-                               evi_vtep->vtep_ip, false /*esr*/);
+                                              evi_vtep->vtep_ip, false /*esr*/,
+                                              0, 0);
                evi_vtep->es_vtep = es_vtep;
        } else {
                if (evi_vtep->es_vtep) {
index 93355d495a8ee8aa96bab951499d087b44ec1b22..d719524bddbc31e88fc8f85a954f24a7ae25ce3f 100644 (file)
@@ -110,6 +110,9 @@ struct bgp_evpn_es {
         */
        uint32_t incons_evi_vtep_cnt;
 
+       /* preference config for BUM-DF election. advertised via the ESR. */
+       uint16_t df_pref;
+
        QOBJ_FIELDS
 };
 DECLARE_QOBJ_TYPE(bgp_evpn_es)
@@ -131,6 +134,10 @@ struct bgp_evpn_es_vtep {
 
        uint32_t evi_cnt; /* es_evis referencing this vtep as an active path */
 
+       /* Algorithm and preference for DF election. Rxed via the ESR */
+       uint8_t df_alg;
+       uint16_t df_pref;
+
        /* memory used for adding the entry to es->es_vtep_list */
        struct listnode es_listnode;
 };
@@ -264,6 +271,11 @@ static inline bool bgp_evpn_attr_is_local_es(struct attr *attr)
        return attr ? !!(attr->es_flags & ATTR_ES_IS_LOCAL) : false;
 }
 
+static inline uint32_t bgp_evpn_attr_get_df_pref(struct attr *attr)
+{
+       return (attr) ? attr->df_pref : 0;
+}
+
 /****************************************************************************/
 extern int bgp_evpn_es_route_install_uninstall(struct bgp *bgp,
                struct bgp_evpn_es *es, afi_t afi, safi_t safi,
@@ -276,7 +288,8 @@ int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi,
                struct attr *attr, uint8_t *pfx, int psize,
                uint32_t addpath_id);
 extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi,
-               struct in_addr originator_ip, bool oper_up);
+                                struct in_addr originator_ip, bool oper_up,
+                                uint16_t df_pref);
 extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi);
 extern int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni);
 extern int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni);
index 611566201eedf4d306fff85ab9838274a91deac8..c47576c00c5a768943f1583a73a17241f936b1e1 100644 (file)
@@ -308,6 +308,17 @@ static inline void encode_es_rt_extcomm(struct ecommunity_val *eval,
        memcpy(&eval->val[2], mac, ETH_ALEN);
 }
 
+static inline void encode_df_elect_extcomm(struct ecommunity_val *eval,
+                                          uint16_t pref)
+{
+       memset(eval, 0, sizeof(*eval));
+       eval->val[0] = ECOMMUNITY_ENCODE_EVPN;
+       eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_DF_ELECTION;
+       eval->val[2] = EVPN_MH_DF_ALG_PREF;
+       eval->val[6] = (pref >> 8) & 0xff;
+       eval->val[7] = pref & 0xff;
+}
+
 static inline void encode_esi_label_extcomm(struct ecommunity_val *eval,
                                        bool single_active)
 {
index c22bfefb630387bc7afa4e055e33c3bb310da5c5..b2491e1187cb4c15a13da54bf6f50a10b2b90810 100644 (file)
@@ -23,6 +23,7 @@
 #include "prefix.h"
 #include "lib/json.h"
 #include "lib/printfrr.h"
+#include "lib/vxlan.h"
 #include "stream.h"
 
 #include "bgpd/bgpd.h"
@@ -4629,7 +4630,8 @@ DEFPY_HIDDEN(test_es_add,
                        oper_up = false;
                vtep_ip = bgp->router_id;
 
-               ret = bgp_evpn_local_es_add(bgp, &esi, vtep_ip, oper_up);
+               ret = bgp_evpn_local_es_add(bgp, &esi, vtep_ip, oper_up,
+                                           EVPN_MH_DF_PREF_MIN);
                if (ret == -1) {
                        vty_out(vty, "%%Failed to add ES\n");
                        return CMD_WARNING;
index d4a69af4f7a3d4b249a4c0b982ebb7cad612c521..00213b4239431b703390796dfad274db40752195 100644 (file)
@@ -2545,6 +2545,7 @@ static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS)
        char buf[ESI_STR_LEN];
        struct in_addr originator_ip;
        uint8_t active;
+       uint16_t df_pref;
 
        bgp = bgp_lookup_by_vrf_id(vrf_id);
        if (!bgp)
@@ -2554,13 +2555,15 @@ static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS)
        stream_get(&esi, s, sizeof(esi_t));
        originator_ip.s_addr = stream_get_ipv4(s);
        active = stream_getc(s);
+       df_pref = stream_getw(s);
 
        if (BGP_DEBUG(zebra, ZEBRA))
-               zlog_debug("Rx add ESI %s originator-ip %pI4 active %u",
-                          esi_to_str(&esi, buf, sizeof(buf)), &originator_ip,
-                          active);
+               zlog_debug(
+                       "Rx add ESI %s originator-ip %pI4 active %u df_pref %u",
+                       esi_to_str(&esi, buf, sizeof(buf)),
+                       &originator_ip, active, df_pref);
 
-       bgp_evpn_local_es_add(bgp, &esi, originator_ip, active);
+       bgp_evpn_local_es_add(bgp, &esi, originator_ip, active, df_pref);
 
        return 0;
 }
index 24def1bac4dc41894a67a9933f6840435106a8a4..663a87afde26c1c7cb2e64de6477cb24f94fa662 100644 (file)
@@ -30,6 +30,7 @@
 #include "jhash.h"
 #include "lib_errors.h"
 #include "printfrr.h"
+#include "vxlan.h"
 
 DEFINE_MTYPE_STATIC(LIB, PREFIX, "Prefix")
 DEFINE_MTYPE_STATIC(LIB, PREFIX_FLOWSPEC, "Prefix Flowspec")
@@ -1336,6 +1337,29 @@ char *esi_to_str(const esi_t *esi, char *buf, int size)
        return ptr;
 }
 
+char *evpn_es_df_alg2str(uint8_t df_alg, char *buf, int buf_len)
+{
+       switch (df_alg) {
+       case EVPN_MH_DF_ALG_SERVICE_CARVING:
+               snprintf(buf, buf_len, "service-carving");
+               break;
+
+       case EVPN_MH_DF_ALG_HRW:
+               snprintf(buf, buf_len, "HRW");
+               break;
+
+       case EVPN_MH_DF_ALG_PREF:
+               snprintf(buf, buf_len, "preference");
+               break;
+
+       default:
+               snprintf(buf, buf_len, "unknown %u", df_alg);
+               break;
+       }
+
+       return buf;
+}
+
 printfrr_ext_autoreg_p("EA", printfrr_ea)
 static ssize_t printfrr_ea(char *buf, size_t bsz, const char *fmt,
                           int prec, const void *ptr)
index 471978ed28e61e0303ca33240c9397a2381481ee..d2cabf3104dd712f2f04c6476cdedb322d85d9ea 100644 (file)
@@ -62,6 +62,7 @@ typedef enum {
 #define EVPN_ETH_TAG_BYTES 4
 #define ESI_BYTES 10
 #define ESI_STR_LEN (3 * ESI_BYTES)
+#define EVPN_DF_ALG_STR_LEN 24
 
 /* Maximum number of VTEPs per-ES -
  * XXX - temporary limit for allocating strings etc.
@@ -515,6 +516,7 @@ extern unsigned prefix_hash_key(const void *pp);
 
 extern int str_to_esi(const char *str, esi_t *esi);
 extern char *esi_to_str(const esi_t *esi, char *buf, int size);
+extern char *evpn_es_df_alg2str(uint8_t df_alg, char *buf, int buf_len);
 extern void prefix_evpn_hexdump(const struct prefix_evpn *p);
 
 static inline int ipv6_martian(struct in6_addr *addr)
index 23f85d809b5e787df0bc01ba4627e036fb4c2585..4f75f121ca25a6ad3034f51fdd800686933e256e 100644 (file)
@@ -386,6 +386,16 @@ static inline const uint8_t *ptr_get_be32(const uint8_t *ptr, uint32_t *out)
        return ptr + 4;
 }
 
+static inline uint8_t *ptr_get_be16(uint8_t *ptr, uint16_t *out)
+{
+       uint16_t tmp;
+
+       memcpy(&tmp, ptr, sizeof(tmp));
+       *out = ntohs(tmp);
+
+       return ptr + 2;
+}
+
 /*
  * so Normal stream_getX functions assert.  Which is anathema
  * to keeping a daemon up and running when something goes south
index 69d3939596bb5ac05720844a8d806b5dc6c6bd6d..62963a6097d4c677668cc5a53ae2433fcbb5cf6c 100644 (file)
 extern "C" {
 #endif
 
+/* EVPN MH DF election alogorithm */
+#define EVPN_MH_DF_ALG_SERVICE_CARVING 0
+#define EVPN_MH_DF_ALG_HRW 1
+#define EVPN_MH_DF_ALG_PREF 2
+
+/* preference range for DF election */
+#define EVPN_MH_DF_PREF_MIN 0
+#define EVPN_MH_DF_PREF_DEFAULT 32767
+#define EVPN_MH_DF_PREF_MAX 65535
+
 /* VxLAN Network Identifier - 24-bit (RFC 7348) */
 typedef uint32_t vni_t;
 #define VNI_MAX 16777215 /* (2^24 - 1) */
index 959a101395842644d805541357ad626760b7063b..80dca3fc56072fd881e393092224badd23b216a1 100644 (file)
@@ -690,6 +690,12 @@ zapi_rule_notify_owner2str(enum zapi_rule_notify_owner note)
  * to allocate past 0x80
  */
 
+/* Zebra ES VTEP flags (ZEBRA_REMOTE_ES_VTEP_ADD) */
+/* ESR has been rxed from the VTEP. Only VTEPs that have advertised the
+ * Type-4 route can participate in DF election.
+ */
+#define ZAPI_ES_VTEP_FLAG_ESR_RXED (1 << 0)
+
 enum zebra_neigh_state { ZEBRA_NEIGH_INACTIVE = 0, ZEBRA_NEIGH_ACTIVE = 1 };
 
 struct zclient_options {