From c44ab6f1f3016fe1d595067dfa95b917e25519e9 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Fri, 27 Mar 2020 14:43:50 -0700 Subject: [PATCH] bgpd: support for Ethernet Segments and Type-1/EAD routes This is the base patch that brings in support for Type-1 routes. It includes support for - - Ethernet Segment (ES) management - EAD route handling - MAC-IP (Type-2) routes with a non-zero ESI i.e. Aliasing for active-active multihoming - Initial infra for consistency checking. Consistency checking is a fundamental feature for active-active solutions like MLAG. We will try to levarage the info in the EAD-ES/EAD-EVI routes to detect inconsitencies in access config across VTEPs attached to the same Ethernet Segment. Functionality Overview - ======================== 1. Ethernet segments are created in zebra and associated with access VLANs. zebra sends that info as ES and ES-EVI objects to BGP. 2. BGP advertises EAD-ES and EAD-EVI routes for the locally attached ethernet segments. 3. Similarly BGP processes EAD-ES and EAD-EVI routes from peers and translates them into ES-VTEP objects which are then sent to zebra as remote ESs. 4. Each ES in zebra is associated with a list of active VTEPs which is then translated into a L2-NHG (nexthop group). This is the ES "Alias" entry 5. MAC-IP routes with a non-zero ESI use the alias entry created in (4.) to forward traffic i.e. a MAC-ECMP is done to these remote-ES destinations. EAD route management (route table and key) - ============================================ 1. Local EAD-ES routes a. route-table: per-ES route-table key: {RD=ES-RD, ESI, ET=0xffffffff, VTEP-IP) b. route-table: per-VNI route-table Not added c. route-table: global route-table key: {RD=ES-RD, ESI, ET=0xffffffff) 2. Remote EAD-ES routes a. route-table: per-ES route-table Not added b. route-table: per-VNI route-table key: {RD=ES-RD, ESI, ET=0xffffffff, VTEP-IP) c. route-table: global route-table key: {RD=ES-RD, ESI, ET=0xffffffff) 3. Local EAD-EVI routes a. route-table: per-ES route-table Not added b. route-table: per-VNI route-table key: {RD=0, ESI, ET=0, VTEP-IP) c. route-table: global route-table key: {RD=L2-VNI-RD, ESI, ET=0) 4. Remote EAD-EVI routes a. route-table: per-ES route-table Not added b. route-table: per-VNI route-table key: {RD=0, ESI, ET=0, VTEP-IP) c. route-table: global route-table key: {RD=L2-VNI-RD, ESI, ET=0) Please refer to bgp_evpn_mh.h for info on how the data-structures are organized. Signed-off-by: Anuradha Karuppiah --- bgpd/bgp_evpn.c | 228 ++- bgpd/bgp_evpn.h | 2 +- bgpd/bgp_evpn_mh.c | 3125 +++++++++++++++++++++++++++++++-------- bgpd/bgp_evpn_mh.h | 244 ++- bgpd/bgp_evpn_private.h | 125 +- bgpd/bgp_rd.h | 1 + bgpd/bgp_route.h | 1 + bgpd/bgp_zebra.c | 90 +- bgpd/bgpd.c | 3 + bgpd/bgpd.h | 6 +- lib/prefix.h | 1 + 11 files changed, 3098 insertions(+), 728 deletions(-) diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 605a2c058..e4252cace 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -55,10 +55,8 @@ /* * Definitions and external declarations. */ -extern struct zclient *zclient; - DEFINE_QOBJ_TYPE(bgpevpn) -DEFINE_QOBJ_TYPE(evpnes) +DEFINE_QOBJ_TYPE(bgp_evpn_es) /* @@ -94,7 +92,7 @@ static bool vni_hash_cmp(const void *p1, const void *p2) return (vpn1->vni == vpn2->vni); } -static int vni_list_cmp(void *p1, void *p2) +int vni_list_cmp(void *p1, void *p2) { const struct bgpevpn *vpn1 = p1; const struct bgpevpn *vpn2 = p2; @@ -540,19 +538,54 @@ static void evpn_convert_nexthop_to_ipv6(struct attr *attr) attr->mp_nexthop_len = IPV6_MAX_BYTELEN; } +struct bgp_node *bgp_global_evpn_node_get( + struct bgp_table *table, afi_t afi, + safi_t safi, const struct prefix_evpn *evp, + struct prefix_rd *prd) +{ + struct prefix_evpn global_p; + + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) { + /* prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy of the prefix + */ + evpn_type1_prefix_global_copy(&global_p, evp); + evp = &global_p; + } + return bgp_afi_node_get(table, afi, safi, (struct prefix *)evp, prd); +} + +struct bgp_node *bgp_global_evpn_node_lookup( + struct bgp_table *table, afi_t afi, + safi_t safi, const struct prefix_evpn *evp, + struct prefix_rd *prd) +{ + struct prefix_evpn global_p; + + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) { + /* prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy of the prefix + */ + evpn_type1_prefix_global_copy(&global_p, evp); + evp = &global_p; + } + return bgp_afi_node_lookup(table, afi, safi, (struct prefix *)evp, prd); +} + /* * Add (update) or delete MACIP from zebra. */ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, const struct prefix_evpn *p, struct in_addr remote_vtep_ip, int add, - uint8_t flags, uint32_t seq) + uint8_t flags, uint32_t seq, esi_t *esi) { struct stream *s; int ipa_len; char buf1[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; char buf3[INET6_ADDRSTRLEN]; + static struct in_addr zero_remote_vtep_ip; /* Check socket. */ if (!zclient || zclient->sock < 0) @@ -566,6 +599,9 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, __func__); return 0; } + + if (!esi) + esi = zero_esi; s = zclient->obuf; stream_reset(s); @@ -583,13 +619,20 @@ static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn, stream_putl(s, ipa_len); stream_put(s, &p->prefix.macip_addr.ip.ip.addr, ipa_len); } - stream_put_in_addr(s, &remote_vtep_ip); + /* tape out the VTEP-IP if the ESI is non-zero to avoid incorrect + * mods + */ + if (memcmp(zero_esi, esi, sizeof(esi_t))) + stream_put_in_addr(s, &remote_vtep_ip); + else + stream_put_in_addr(s, &zero_remote_vtep_ip); /* TX flags - MAC sticky status and/or gateway mac */ /* Also TX the sequence number of the best route. */ if (add) { stream_putc(s, flags); stream_putl(s, seq); + stream_put(s, esi, sizeof(esi_t)); } stream_putw_at(s, 0, stream_get_endp(s)); @@ -873,7 +916,10 @@ static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn, SET_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG); ret = bgp_zebra_send_remote_macip( bgp, vpn, p, pi->attr->nexthop, 1, flags, - mac_mobility_seqnum(pi->attr)); + mac_mobility_seqnum(pi->attr), + bgp_evpn_attr_get_esi(pi->attr)); + } else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) { + ret = bgp_evpn_remote_es_evi_add(bgp, vpn, p); } else { switch (pi->attr->pmsi_tnl_type) { case PMSI_TNLTYPE_INGR_REPL: @@ -903,7 +949,9 @@ static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn, if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) ret = bgp_zebra_send_remote_macip(bgp, vpn, p, remote_vtep_ip, - 0, 0, 0); + 0, 0, 0, NULL); + else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) + ret = bgp_evpn_remote_es_evi_del(bgp, vpn, p); else ret = bgp_zebra_send_remote_vtep(bgp, vpn, p, VXLAN_FLOOD_DISABLED, 0); @@ -929,8 +977,9 @@ static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn, * this table is a 2-level tree (RD-level + Prefix-level) similar to * L3VPN routes. */ - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - bgp_dest_get_prefix(dest), &vpn->prd); + global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)bgp_dest_get_prefix(dest), + &vpn->prd); if (global_dest) { /* Delete route entry in the global EVPN table. */ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); @@ -950,8 +999,9 @@ static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn, /* * Calculate the best path for an EVPN route. Install/update best path in zebra, * if appropriate. + * Note: vpn is NULL for local EAD-ES routes. */ -static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, +int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, struct bgp_dest *dest) { struct bgp_path_info *old_select, *new_select; @@ -1023,7 +1073,8 @@ static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, */ if (old_select && old_select->peer == bgp->peer_self && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_STATIC) + && old_select->sub_type == BGP_ROUTE_STATIC + && vpn) evpn_delete_old_local_route(bgp, vpn, dest, old_select); } else { if (old_select && old_select->type == ZEBRA_ROUTE_BGP @@ -1235,8 +1286,9 @@ static int update_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp, build_evpn_type5_route_extcomm(bgp_vrf, &attr); /* get the route node in global table */ - dest = bgp_afi_node_get(bgp_evpn->rib[afi][safi], afi, safi, - (struct prefix *)evp, &bgp_vrf->vrf_prd); + dest = bgp_global_evpn_node_get(bgp_evpn->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)evp, + &bgp_vrf->vrf_prd); assert(dest); /* create or update the route entry within the route node */ @@ -1450,7 +1502,7 @@ static void evpn_cleanup_local_non_best_route(struct bgp *bgp, */ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, struct prefix_evpn *p, uint8_t flags, - uint32_t seq) + uint32_t seq, esi_t *esi) { struct bgp_dest *dest; struct attr attr; @@ -1472,7 +1524,8 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, attr.default_gw = CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_GW) ? 1 : 0; attr.router_flag = CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_ROUTER_FLAG) ? 1 : 0; - + if (esi) + memcpy(&attr.esi, esi, sizeof(esi_t)); /* PMSI is only needed for type-3 routes */ if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) { attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_PMSI_TUNNEL); @@ -1562,8 +1615,9 @@ static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, if (route_change) { struct bgp_path_info *global_pi; - dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &vpn->prd); + dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)p, + &vpn->prd); update_evpn_route_entry(bgp, vpn, afi, safi, dest, attr_new, 1, &global_pi, flags, seq); @@ -1619,8 +1673,8 @@ static int delete_evpn_type5_route(struct bgp *bgp_vrf, struct prefix_evpn *evp) return 0; /* locate the global route entry for this type-5 prefix */ - dest = bgp_afi_node_lookup(bgp_evpn->rib[afi][safi], afi, safi, - (struct prefix *)evp, &bgp_vrf->vrf_prd); + dest = bgp_global_evpn_node_lookup(bgp_evpn->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)evp, &bgp_vrf->vrf_prd); if (!dest) return 0; @@ -1656,8 +1710,8 @@ static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn, * this table is a 2-level tree (RD-level + Prefix-level) similar to * L3VPN routes. */ - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &vpn->prd); + global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)p, &vpn->prd); if (global_dest) { /* Delete route entry in the global EVPN table. */ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); @@ -1800,9 +1854,9 @@ static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn) bgp_path_info_unlock(pi); /* Update route in global routing table. */ - rd_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, - safi, (struct prefix *)evp, - &vpn->prd); + rd_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, + safi, (const struct prefix_evpn *)evp, + &vpn->prd); assert(rd_dest); update_evpn_route_entry(bgp, vpn, afi, safi, rd_dest, attr_new, 0, &global_pi, 0, @@ -1958,7 +2012,7 @@ int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) if (bgp_evpn_vni_flood_mode_get(bgp, vpn) == VXLAN_FLOOD_HEAD_END_REPL) { build_evpn_type3_prefix(&p, vpn->originator_ip); - ret = update_evpn_route(bgp, vpn, &p, 0, 0); + ret = update_evpn_route(bgp, vpn, &p, 0, 0, NULL); if (ret) return ret; } @@ -2202,6 +2256,14 @@ static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, struct bgp_path_info *pi; struct attr *attr_new; int ret; + struct prefix_evpn ad_evp; + + /* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ + if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) + p = evpn_type1_prefix_vni_copy(&ad_evp, p, + parent_pi->attr->nexthop); /* Create (or fetch) route within the VNI. */ /* NOTE: There is no RD here. */ @@ -2336,6 +2398,14 @@ static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn, struct bgp_dest *dest; struct bgp_path_info *pi; int ret; + struct prefix_evpn ad_evp; + + /* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ + if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) + p = evpn_type1_prefix_vni_copy(&ad_evp, p, + parent_pi->attr->nexthop); /* Locate route within the VNI. */ /* NOTE: There is no RD here. */ @@ -2727,6 +2797,11 @@ static int install_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) if (ret) return ret; + ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE, + 1); + if (ret) + return ret; + return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE, 1); } @@ -2755,6 +2830,12 @@ static int uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn) if (ret) return ret; + ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_AD_ROUTE, + 1); + if (ret) + return ret; + + return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE, 0); } @@ -2853,6 +2934,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, struct attr *attr = pi->attr; struct ecommunity *ecom; int i; + struct prefix_evpn ad_evp; assert(attr); @@ -2860,6 +2942,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE || evp->prefix.route_type == BGP_EVPN_IMET_ROUTE || evp->prefix.route_type == BGP_EVPN_ES_ROUTE + || evp->prefix.route_type == BGP_EVPN_AD_ROUTE || evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE)) return 0; @@ -2867,6 +2950,12 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, if (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES))) return 0; + /* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) + evp = evpn_type1_prefix_vni_copy(&ad_evp, evp, attr->nexthop); + ecom = attr->ecommunity; if (!ecom || !ecom->size) return -1; @@ -2880,7 +2969,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, struct ecommunity_val eval_tmp; struct irt_node *irt; /* import rt for l2vni */ struct vrf_irt_node *vrf_irt; /* import rt for l3vni */ - struct evpnes *es; + struct bgp_evpn_es *es; /* Only deal with RTs */ pnt = (ecom->val + (i * ECOMMUNITY_SIZE)); @@ -2898,6 +2987,7 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, */ if (evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE || evp->prefix.route_type == BGP_EVPN_IMET_ROUTE || + evp->prefix.route_type == BGP_EVPN_AD_ROUTE || evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE) { irt = lookup_import_rt(bgp, eval); @@ -2945,9 +3035,9 @@ static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi, /* we will match based on the entire esi to avoid * imoort of an es route for esi2 into esi1 */ - es = bgp_evpn_lookup_es(bgp, &evp->prefix.es_addr.esi); + es = bgp_evpn_es_find(&evp->prefix.es_addr.esi); if (es && is_es_local(es)) - install_uninstall_route_in_es( + bgp_evpn_es_route_install_uninstall( bgp, es, afi, safi, evp, pi, import); } } @@ -3081,8 +3171,8 @@ static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) return 0; attr = pi->attr; - global_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)&p, &vpn->prd); + global_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)&p, &vpn->prd); update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr, 1, &pi, 0, mac_mobility_seqnum(attr)); @@ -3115,8 +3205,8 @@ static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) * attribute. */ attr = pi->attr; - global_dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)evp, &vpn->prd); + global_dest = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)evp, &vpn->prd); assert(global_dest); update_evpn_route_entry(bgp, vpn, afi, safi, global_dest, attr, 1, &global_pi, 0, @@ -3152,8 +3242,8 @@ static int delete_withdraw_vni_routes(struct bgp *bgp, struct bgpevpn *vpn) /* Remove type-3 route for this VNI from global table. */ build_evpn_type3_prefix(&p, vpn->originator_ip); - global_dest = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - (struct prefix *)&p, &vpn->prd); + global_dest = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)&p, &vpn->prd); if (global_dest) { /* Delete route entry in the global EVPN table. */ delete_evpn_route_entry(bgp, afi, safi, global_dest, &pi); @@ -3220,7 +3310,7 @@ static void create_advertise_type3(struct hash_bucket *bucket, void *data) return; build_evpn_type3_prefix(&p, vpn->originator_ip); - if (update_evpn_route(bgp, vpn, &p, 0, 0)) + if (update_evpn_route(bgp, vpn, &p, 0, 0, NULL)) flog_err(EC_BGP_EVPN_ROUTE_CREATE, "Type3 route creation failure for VNI %u", vpn->vni); } @@ -3288,8 +3378,9 @@ static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi, p.prefix.route_type = BGP_EVPN_MAC_IP_ROUTE; /* Copy Ethernet Seg Identifier */ - memcpy(&evpn.eth_s_id.val, pfx, ESI_LEN); - pfx += ESI_LEN; + if (attr) + memcpy(&attr->esi, pfx, sizeof(esi_t)); + pfx += sizeof(esi_t); /* Copy Ethernet Tag */ memcpy(ð_tag, pfx, 4); @@ -3486,8 +3577,9 @@ static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi, memset(&evpn, 0, sizeof(evpn)); /* Fetch ESI */ - memcpy(&evpn.eth_s_id.val, pfx, 10); - pfx += 10; + if (attr) + memcpy(&attr->esi, pfx, sizeof(esi_t)); + pfx += ESI_BYTES; /* Fetch Ethernet Tag. */ memcpy(ð_tag, pfx, 4); @@ -3583,9 +3675,9 @@ static void evpn_mpattr_encode_type5(struct stream *s, const struct prefix *p, stream_putc(s, 8 + 10 + 4 + 1 + len + 3); stream_put(s, prd->val, 8); if (attr) - stream_put(s, &(attr->evpn_overlay.eth_s_id), 10); + stream_put(s, &attr->esi, sizeof(esi_t)); else - stream_put(s, &temp, 10); + stream_put(s, 0, sizeof(esi_t)); stream_putl(s, p_evpn_p->prefix_addr.eth_tag); stream_putc(s, p_evpn_p->prefix_addr.ip_prefix_length); if (IS_IPADDR_V4(&p_evpn_p->prefix_addr.ip)) @@ -4288,6 +4380,15 @@ char *bgp_evpn_route2str(const struct prefix_evpn *p, char *buf, int len) is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN : IPV6_MAX_BITLEN, inet_ntoa(p->prefix.es_addr.ip.ipaddr_v4)); + } else if (p->prefix.route_type == BGP_EVPN_AD_ROUTE) { + snprintf(buf, len, "[%d]:[%u]:[%s]:[%d]:[%s]", + p->prefix.route_type, + p->prefix.ead_addr.eth_tag, + esi_to_str(&p->prefix.ead_addr.esi, + buf3, sizeof(buf3)), + is_evpn_prefix_ipaddr_v4(p) ? IPV4_MAX_BITLEN + : IPV6_MAX_BITLEN, + inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4)); } else { /* For EVPN route types not supported yet. */ snprintf(buf, len, "(unsupported route type %d)", @@ -4327,7 +4428,7 @@ void bgp_evpn_encode_prefix(struct stream *s, const struct prefix *p, stream_putc(s, len); stream_put(s, prd->val, 8); /* RD */ if (attr) - stream_put(s, &attr->evpn_overlay.eth_s_id, ESI_LEN); + stream_put(s, &attr->esi, ESI_BYTES); else stream_put(s, 0, 10); stream_putl(s, evp->prefix.macip_addr.eth_tag); /* Ethernet Tag ID */ @@ -4362,6 +4463,16 @@ void bgp_evpn_encode_prefix(struct stream *s, const struct prefix *p, stream_put_in_addr(s, &evp->prefix.es_addr.ip.ipaddr_v4); break; + case BGP_EVPN_AD_ROUTE: + /* RD, ESI, EthTag, 1 VNI */ + len = RD_BYTES + ESI_BYTES + EVPN_ETH_TAG_BYTES + BGP_LABEL_BYTES; + stream_putc(s, len); + stream_put(s, prd->val, RD_BYTES); /* RD */ + stream_put(s, evp->prefix.ead_addr.esi.val, ESI_BYTES); /* ESI */ + stream_putl(s, evp->prefix.ead_addr.eth_tag); /* Ethernet Tag */ + stream_put(s, label, BGP_LABEL_BYTES); + break; + case BGP_EVPN_IP_PREFIX_ROUTE: /* TODO: AddPath support. */ evpn_mpattr_encode_type5(s, p, prd, label, num_labels, attr); @@ -4449,7 +4560,7 @@ int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, break; case BGP_EVPN_ES_ROUTE: - if (process_type4_route(peer, afi, safi, + if (bgp_evpn_type4_route_process(peer, afi, safi, withdraw ? NULL : attr, pnt, psize, addpath_id)) { flog_err( @@ -4460,6 +4571,18 @@ int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr, } break; + case BGP_EVPN_AD_ROUTE: + if (bgp_evpn_type1_route_process(peer, afi, safi, + withdraw ? NULL : attr, pnt, + psize, addpath_id)) { + flog_err( + EC_BGP_PKT_PROCESS, + "%u:%s - Error in processing EVPN type-1 NLRI size %d", + peer->bgp->vrf_id, peer->host, psize); + return BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE; + } + break; + case BGP_EVPN_IP_PREFIX_ROUTE: if (process_type5_route(peer, afi, safi, withdraw ? NULL : attr, pnt, @@ -4638,7 +4761,7 @@ void bgp_evpn_derive_auto_rd_for_vrf(struct bgp *bgp) */ void bgp_evpn_derive_auto_rd(struct bgp *bgp, struct bgpevpn *vpn) { - char buf[100]; + char buf[BGP_EVPN_PREFIX_RD_LEN]; vpn->prd.family = AF_UNSPEC; vpn->prd.prefixlen = 64; @@ -4722,6 +4845,8 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, /* add to l2vni list on corresponding vrf */ bgpevpn_link_to_l3vni(vpn); + bgp_evpn_vni_es_init(vpn); + QOBJ_REG(vpn, bgpevpn); return vpn; } @@ -4734,6 +4859,7 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, */ void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) { + bgp_evpn_vni_es_cleanup(vpn); bgpevpn_unlink_from_l3vni(vpn); bgp_table_unlock(vpn->route_table); bgp_evpn_unmap_vni_from_its_rts(bgp, vpn); @@ -4866,7 +4992,7 @@ int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni, struct ethaddr *mac, * Handle add of a local MACIP. */ int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac, - struct ipaddr *ip, uint8_t flags, uint32_t seq) + struct ipaddr *ip, uint8_t flags, uint32_t seq, esi_t *esi) { struct bgpevpn *vpn; struct prefix_evpn p; @@ -4882,7 +5008,7 @@ int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac, /* Create EVPN type-2 route and schedule for processing. */ build_evpn_type2_prefix(&p, mac, ip); - if (update_evpn_route(bgp, vpn, &p, flags, seq)) { + if (update_evpn_route(bgp, vpn, &p, flags, seq, esi)) { char buf[ETHER_ADDR_STRLEN]; char buf2[INET6_ADDRSTRLEN]; @@ -5254,7 +5380,7 @@ int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni, if (bgp_evpn_vni_flood_mode_get(bgp, vpn) == VXLAN_FLOOD_HEAD_END_REPL) { build_evpn_type3_prefix(&p, vpn->originator_ip); - if (update_evpn_route(bgp, vpn, &p, 0, 0)) { + if (update_evpn_route(bgp, vpn, &p, 0, 0, NULL)) { flog_err(EC_BGP_EVPN_ROUTE_CREATE, "%u: Type3 route creation failure for VNI %u", bgp->vrf_id, vni); @@ -5327,9 +5453,6 @@ void bgp_evpn_cleanup(struct bgp *bgp) hash_free(bgp->vnihash); bgp->vnihash = NULL; - if (bgp->esihash) - hash_free(bgp->esihash); - bgp->esihash = NULL; list_delete(&bgp->vrf_import_rtl); list_delete(&bgp->vrf_export_rtl); @@ -5346,9 +5469,6 @@ void bgp_evpn_init(struct bgp *bgp) { bgp->vnihash = hash_create(vni_hash_key_make, vni_hash_cmp, "BGP VNI Hash"); - bgp->esihash = - hash_create(esi_hash_keymake, esi_cmp, - "BGP EVPN Local ESI Hash"); bgp->import_rt_hash = hash_create(import_rt_hash_key_make, import_rt_hash_cmp, "BGP Import RT Hash"); diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index cb87f8942..8535f1fa3 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -176,7 +176,7 @@ extern int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni, int state); extern int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac, struct ipaddr *ip, - uint8_t flags, uint32_t seq); + uint8_t flags, uint32_t seq, esi_t *esi); extern int bgp_evpn_local_l3vni_add(vni_t vni, vrf_id_t vrf_id, struct ethaddr *rmac, struct ethaddr *vrr_rmac, diff --git a/bgpd/bgp_evpn_mh.c b/bgpd/bgp_evpn_mh.c index 341b6e8e6..6e6dda7aa 100644 --- a/bgpd/bgp_evpn_mh.c +++ b/bgpd/bgp_evpn_mh.c @@ -1,6 +1,7 @@ /* EVPN Multihoming procedures * * Copyright (C) 2019 Cumulus Networks, Inc. + * Anuradha Karuppiah * * This file is part of FRR. * @@ -44,159 +45,45 @@ #include "bgpd/bgp_aspath.h" #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_addpath.h" - -/* compare two IPV4 VTEP IPs */ -static int evpn_vtep_ip_cmp(void *p1, void *p2) -{ - const struct in_addr *ip1 = p1; - const struct in_addr *ip2 = p2; - - return ip1->s_addr - ip2->s_addr; -} - -/* - * Make hash key for ESI. - */ -unsigned int esi_hash_keymake(const void *p) -{ - const struct evpnes *pes = p; - const void *pnt = (void *)pes->esi.val; - - return jhash(pnt, ESI_BYTES, 0xa5a5a55a); -} - -/* - * Compare two ESIs. - */ -bool esi_cmp(const void *p1, const void *p2) -{ - const struct evpnes *pes1 = p1; - const struct evpnes *pes2 = p2; - - if (pes1 == NULL && pes2 == NULL) - return true; - - if (pes1 == NULL || pes2 == NULL) - return false; - - return (memcmp(pes1->esi.val, pes2->esi.val, ESI_BYTES) == 0); -} - -/* - * Build extended community for EVPN ES (type-4) route - */ -static void build_evpn_type4_route_extcomm(struct evpnes *es, - struct attr *attr) -{ - struct ecommunity ecom_encap; - struct ecommunity ecom_es_rt; - struct ecommunity_val eval; - struct ecommunity_val eval_es_rt; - bgp_encap_types tnl_type; - struct ethaddr mac; - - /* Encap */ - tnl_type = BGP_ENCAP_TYPE_VXLAN; - memset(&ecom_encap, 0, sizeof(ecom_encap)); - encode_encap_extcomm(tnl_type, &eval); - ecom_encap.size = 1; - ecom_encap.val = (uint8_t *)eval.val; - attr->ecommunity = ecommunity_dup(&ecom_encap); - - /* ES import RT */ - memset(&mac, 0, sizeof(struct ethaddr)); - memset(&ecom_es_rt, 0, sizeof(ecom_es_rt)); - es_get_system_mac(&es->esi, &mac); - encode_es_rt_extcomm(&eval_es_rt, &mac); - ecom_es_rt.size = 1; - ecom_es_rt.val = (uint8_t *)eval_es_rt.val; - attr->ecommunity = - ecommunity_merge(attr->ecommunity, &ecom_es_rt); - - attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); -} - -static struct in_addr *es_vtep_new(struct in_addr vtep) -{ - struct in_addr *ip; - - ip = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(struct in_addr)); - - ip->s_addr = vtep.s_addr; - return ip; -} - -static void es_vtep_free(struct in_addr *ip) -{ - XFREE(MTYPE_BGP_EVPN_ES_VTEP, ip); -} - -/* check if VTEP is already part of the list */ -static int is_vtep_present_in_list(struct list *list, - struct in_addr vtep) -{ - struct listnode *node = NULL; - struct in_addr *tmp; - - for (ALL_LIST_ELEMENTS_RO(list, node, tmp)) { - if (tmp->s_addr == vtep.s_addr) - return 1; - } - return 0; -} - -/* - * Best path for ES route was changed, - * update the list of VTEPs for this ES - */ -static int evpn_es_install_vtep(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p, - struct in_addr rvtep) -{ - struct in_addr *vtep_ip; - - if (is_vtep_present_in_list(es->vtep_list, rvtep)) - return 0; - - - vtep_ip = es_vtep_new(rvtep); - if (vtep_ip) - listnode_add_sort(es->vtep_list, vtep_ip); - return 0; -} - -/* - * Best path for ES route was changed, - * update the list of VTEPs for this ES +#include "bgpd/bgp_label.h" + +static void bgp_evpn_local_es_down(struct bgp *bgp, + struct bgp_evpn_es *es); +static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp, + struct bgp_evpn_es *es); +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr); +static void bgp_evpn_es_vtep_del(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr); +static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es); +static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es); +static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi); + +esi_t zero_esi_buf, *zero_esi = &zero_esi_buf; + +/****************************************************************************** + * per-ES (Ethernet Segment) routing table + * + * Following routes are added to the ES's routing table - + * 1. Local and remote ESR (Type-4) + * 2. Local EAD-per-ES (Type-1). + * + * Key for these routes is {ESI, VTEP-IP} so the path selection is practically + * a no-op i.e. all paths lead to same VTEP-IP (i.e. result in the same VTEP + * being added to same ES). + * + * Note the following routes go into the VNI routing table (instead of the + * ES routing table) - + * 1. Remote EAD-per-ES + * 2. Local and remote EAD-per-EVI */ -static int evpn_es_uninstall_vtep(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p, - struct in_addr rvtep) -{ - struct listnode *node, *nnode, *node_to_del = NULL; - struct in_addr *tmp; - - for (ALL_LIST_ELEMENTS(es->vtep_list, node, nnode, tmp)) { - if (tmp->s_addr == rvtep.s_addr) { - es_vtep_free(tmp); - node_to_del = node; - } - } - - if (node_to_del) - list_delete_node(es->vtep_list, node_to_del); - - return 0; -} -/* - * Calculate the best path for a ES(type-4) route. +/* Calculate the best path for a multi-homing (Type-1 or Type-4) route + * installed in the ES's routing table. */ -static int evpn_es_route_select_install(struct bgp *bgp, - struct evpnes *es, - struct bgp_node *rn) +static int bgp_evpn_es_route_select_install(struct bgp *bgp, + struct bgp_evpn_es *es, + struct bgp_node *rn) { int ret = 0; afi_t afi = AFI_L2VPN; @@ -207,7 +94,7 @@ static int evpn_es_route_select_install(struct bgp *bgp, /* Compute the best path. */ bgp_best_selection(bgp, rn, &bgp->maxpaths[afi][safi], - &old_and_new, afi, safi); + &old_and_new, afi, safi); old_select = old_and_new.old; new_select = old_and_new.new; @@ -216,15 +103,15 @@ static int evpn_es_route_select_install(struct bgp *bgp, * updated */ if (old_select && old_select == new_select - && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_IMPORTED - && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR) - && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED) - && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) { + && old_select->type == ZEBRA_ROUTE_BGP + && old_select->sub_type == BGP_ROUTE_IMPORTED + && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR) + && !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED) + && !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) { if (bgp_zebra_has_route_changed(old_select)) { - ret = evpn_es_install_vtep(bgp, es, - (struct prefix_evpn *)&rn->p, - old_select->attr->nexthop); + bgp_evpn_es_vtep_add(bgp, es, + old_select->attr->nexthop, + true /*esr*/); } UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG); bgp_zebra_clear_route_change_flags(rn); @@ -234,8 +121,7 @@ static int evpn_es_route_select_install(struct bgp *bgp, /* If the user did a "clear" this flag will be set */ UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR); - /* - * bestpath has changed; update relevant fields and install or uninstall + /* bestpath has changed; update relevant fields and install or uninstall * into the zebra RIB. */ if (old_select || new_select) @@ -250,16 +136,15 @@ static int evpn_es_route_select_install(struct bgp *bgp, } if (new_select && new_select->type == ZEBRA_ROUTE_BGP - && new_select->sub_type == BGP_ROUTE_IMPORTED) { - ret = evpn_es_install_vtep(bgp, es, - (struct prefix_evpn *)&rn->p, - new_select->attr->nexthop); + && new_select->sub_type == BGP_ROUTE_IMPORTED) { + bgp_evpn_es_vtep_add(bgp, es, + new_select->attr->nexthop, true /*esr */); } else { if (old_select && old_select->type == ZEBRA_ROUTE_BGP - && old_select->sub_type == BGP_ROUTE_IMPORTED) - ret = evpn_es_uninstall_vtep( - bgp, es, (struct prefix_evpn *)&rn->p, - old_select->attr->nexthop); + && old_select->sub_type == BGP_ROUTE_IMPORTED) + bgp_evpn_es_vtep_del( + bgp, es, old_select->attr->nexthop, + true /*esr*/); } /* Clear any route change flags. */ @@ -272,53 +157,214 @@ static int evpn_es_route_select_install(struct bgp *bgp, return ret; } -/* - * create or update EVPN type4 route entry. - * This could be in the ES table or the global table. - * TODO: handle remote ES (type4) routes as well +/* Install Type-1/Type-4 route entry in the per-ES routing table */ +static int bgp_evpn_es_route_install(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p, + struct bgp_path_info *parent_pi) +{ + int ret = 0; + struct bgp_node *rn = NULL; + struct bgp_path_info *pi = NULL; + struct attr *attr_new = NULL; + + /* Create (or fetch) route within the VNI. + * NOTE: There is no RD here. + */ + rn = bgp_node_get(es->route_table, (struct prefix *)p); + + /* Check if route entry is already present. */ + for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next) + if (pi->extra + && (struct bgp_path_info *)pi->extra->parent == + parent_pi) + break; + + if (!pi) { + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern(parent_pi->attr); + + /* Create new route with its attribute. */ + pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0, + parent_pi->peer, attr_new, rn); + SET_FLAG(pi->flags, BGP_PATH_VALID); + bgp_path_info_extra_get(pi); + pi->extra->parent = bgp_path_info_lock(parent_pi); + bgp_dest_lock_node((struct bgp_node *)parent_pi->net); + bgp_path_info_add(rn, pi); + } else { + if (attrhash_cmp(pi->attr, parent_pi->attr) + && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { + bgp_dest_unlock_node(rn); + return 0; + } + /* The attribute has changed. */ + /* Add (or update) attribute to hash. */ + attr_new = bgp_attr_intern(parent_pi->attr); + + /* Restore route, if needed. */ + if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) + bgp_path_info_restore(rn, pi); + + /* Mark if nexthop has changed. */ + if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop)) + SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED); + + /* Unintern existing, set to new. */ + bgp_attr_unintern(&pi->attr); + pi->attr = attr_new; + pi->uptime = bgp_clock(); + } + + /* Perform route selection and update zebra, if required. */ + ret = bgp_evpn_es_route_select_install(bgp, es, rn); + + bgp_dest_unlock_node(rn); + + return ret; +} + +/* Uninstall Type-1/Type-4 route entry from the ES routing table */ +static int bgp_evpn_es_route_uninstall(struct bgp *bgp, struct bgp_evpn_es *es, + struct prefix_evpn *p, struct bgp_path_info *parent_pi) +{ + int ret; + struct bgp_node *rn; + struct bgp_path_info *pi; + + if (!es->route_table) + return 0; + + /* Locate route within the ESI. + * NOTE: There is no RD here. + */ + rn = bgp_node_lookup(es->route_table, (struct prefix *)p); + if (!rn) + return 0; + + /* Find matching route entry. */ + for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next) + if (pi->extra + && (struct bgp_path_info *)pi->extra->parent == + parent_pi) + break; + + if (!pi) + return 0; + + /* Mark entry for deletion */ + bgp_path_info_delete(rn, pi); + + /* Perform route selection and update zebra, if required. */ + ret = bgp_evpn_es_route_select_install(bgp, es, rn); + + /* Unlock route node. */ + bgp_dest_unlock_node(rn); + + return ret; +} + +/* Install or unistall a Tyoe-4 route in the per-ES routing table */ +int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, struct bgp_evpn_es *es, + afi_t afi, safi_t safi, struct prefix_evpn *evp, + struct bgp_path_info *pi, int install) +{ + int ret = 0; + + if (install) + ret = bgp_evpn_es_route_install(bgp, es, evp, pi); + else + ret = bgp_evpn_es_route_uninstall(bgp, es, evp, pi); + + if (ret) { + flog_err( + EC_BGP_EVPN_FAIL, + "%u: Failed to %s EVPN %s route in ESI %s", + bgp->vrf_id, + install ? "install" : "uninstall", + "ES", es->esi_str); + return ret; + } + return 0; +} + +/* Delete (and withdraw) local routes for specified ES from global and ES table. + * Also remove all remote routes from the per ES table. Invoked when ES + * is deleted. */ -static int update_evpn_type4_route_entry(struct bgp *bgp, struct evpnes *es, - afi_t afi, safi_t safi, - struct bgp_node *rn, struct attr *attr, - int add, struct bgp_path_info **ri, - int *route_changed) +static void bgp_evpn_es_route_del_all(struct bgp *bgp, struct bgp_evpn_es *es) +{ + struct bgp_node *rn; + struct bgp_path_info *pi, *nextpi; + + /* de-activate the ES */ + bgp_evpn_local_es_down(bgp, es); + bgp_evpn_local_type1_evi_route_del(bgp, es); + + /* Walk this ES's routing table and delete all routes. */ + for (rn = bgp_table_top(es->route_table); rn; + rn = bgp_route_next(rn)) { + for (pi = bgp_dest_get_bgp_path_info(rn); + (pi != NULL) && (nextpi = pi->next, 1); + pi = nextpi) { + bgp_path_info_delete(rn, pi); + bgp_path_info_reap(rn, pi); + } + } +} + +/***************************************************************************** + * Base APIs for creating MH routes (Type-1 or Type-4) on local ethernet + * segment updates. + */ + +/* create or update local EVPN type1/type4 route entry. + * + * This could be in - + * the ES table if ESR/EAD-ES (or) + * the VNI table if EAD-EVI (or) + * the global table if ESR/EAD-ES/EAD-EVI + * + * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and + * ESR). + */ +static int bgp_evpn_mh_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, struct bgpevpn *vpn, afi_t afi, + safi_t safi, struct bgp_node *rn, struct attr *attr, + int add, struct bgp_path_info **ri, int *route_changed) { - char buf[ESI_STR_LEN]; - char buf1[INET6_ADDRSTRLEN]; struct bgp_path_info *tmp_pi = NULL; struct bgp_path_info *local_pi = NULL; /* local route entry if any */ struct bgp_path_info *remote_pi = NULL; /* remote route entry if any */ struct attr *attr_new = NULL; - struct prefix_evpn *evp = NULL; + struct prefix_evpn *evp; *ri = NULL; - *route_changed = 1; evp = (struct prefix_evpn *)&rn->p; + *route_changed = 1; /* locate the local and remote entries if any */ for (tmp_pi = bgp_dest_get_bgp_path_info(rn); tmp_pi; tmp_pi = tmp_pi->next) { if (tmp_pi->peer == bgp->peer_self - && tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_STATIC) + && tmp_pi->type == ZEBRA_ROUTE_BGP + && tmp_pi->sub_type == BGP_ROUTE_STATIC) local_pi = tmp_pi; if (tmp_pi->type == ZEBRA_ROUTE_BGP - && tmp_pi->sub_type == BGP_ROUTE_IMPORTED - && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID)) + && tmp_pi->sub_type == BGP_ROUTE_IMPORTED + && CHECK_FLAG(tmp_pi->flags, BGP_PATH_VALID)) remote_pi = tmp_pi; } - /* we don't expect to see a remote_ri at this point. - * An ES route has esi + vtep_ip as the key, - * We shouldn't see the same route from any other vtep. + /* we don't expect to see a remote_ri at this point as + * an ES route has {esi, vtep_ip} as the key in the ES-rt-table + * in the VNI-rt-table. */ if (remote_pi) { flog_err( - EC_BGP_ES_INVALID, - "%u ERROR: local es route for ESI: %s Vtep %s also learnt from remote", - bgp->vrf_id, - esi_to_str(&evp->prefix.es_addr.esi, buf, sizeof(buf)), - ipaddr2str(&es->originator_ip, buf1, sizeof(buf1))); + EC_BGP_ES_INVALID, + "%u ERROR: local es route for ESI: %s Vtep %s also learnt from remote", + bgp->vrf_id, es->esi_str, + inet_ntoa(es->originator_ip)); return -1; } @@ -333,15 +379,24 @@ static int update_evpn_type4_route_entry(struct bgp *bgp, struct evpnes *es, /* Create new route with its attribute. */ tmp_pi = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0, - bgp->peer_self, attr_new, rn); + bgp->peer_self, attr_new, rn); SET_FLAG(tmp_pi->flags, BGP_PATH_VALID); + if (evp->prefix.route_type == BGP_EVPN_AD_ROUTE) { + bgp_path_info_extra_get(tmp_pi); + tmp_pi->extra->num_labels = 1; + if (vpn) + vni2label(vpn->vni, &tmp_pi->extra->label[0]); + else + tmp_pi->extra->label[0] = 0; + } + /* add the newly created path to the route-node */ bgp_path_info_add(rn, tmp_pi); } else { tmp_pi = local_pi; if (attrhash_cmp(tmp_pi->attr, attr) - && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) + && !CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) *route_changed = 0; else { /* The attribute has changed. @@ -349,7 +404,7 @@ static int update_evpn_type4_route_entry(struct bgp *bgp, struct evpnes *es, */ attr_new = bgp_attr_intern(attr); bgp_path_info_set_flag(rn, tmp_pi, - BGP_PATH_ATTR_CHANGED); + BGP_PATH_ATTR_CHANGED); /* Restore route, if needed. */ if (CHECK_FLAG(tmp_pi->flags, BGP_PATH_REMOVED)) @@ -362,114 +417,71 @@ static int update_evpn_type4_route_entry(struct bgp *bgp, struct evpnes *es, } } + if (*route_changed) { + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("local ES %s vni %u route-type %s nexthop %s updated", + es->esi_str, + vpn ? vpn->vni : 0, + evp->prefix.route_type == + BGP_EVPN_ES_ROUTE ? "esr" : + (vpn ? "ead-evi" : "ead-es"), + inet_ntoa(attr->mp_nexthop_global_in)); + } + /* Return back the route entry. */ *ri = tmp_pi; return 0; } -/* update evpn es (type-4) route */ -static int update_evpn_type4_route(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p) +/* Delete local EVPN ESR (type-4) and EAD (type-1) route + * + * Note: vpn is applicable only to EAD-EVI routes (NULL for EAD-ES and + * ESR). + */ +static int bgp_evpn_mh_route_delete(struct bgp *bgp, struct bgp_evpn_es *es, + struct bgpevpn *vpn, struct prefix_evpn *p) { - int ret = 0; - int route_changed = 0; - char buf[ESI_STR_LEN]; - char buf1[INET6_ADDRSTRLEN]; afi_t afi = AFI_L2VPN; safi_t safi = SAFI_EVPN; - struct attr attr; - struct attr *attr_new = NULL; - struct bgp_node *rn = NULL; - struct bgp_path_info *pi = NULL; + struct bgp_path_info *pi; + struct bgp_node *rn = NULL; /* rn in esi table */ + struct bgp_node *global_rn = NULL; /* rn in global table */ + struct bgp_table *rt_table; + struct prefix_rd *prd; - memset(&attr, 0, sizeof(struct attr)); + if (vpn) { + rt_table = vpn->route_table; + prd = &vpn->prd; + } else { + rt_table = es->route_table; + prd = &es->prd; + } - /* Build path-attribute for this route. */ - bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); - attr.nexthop = es->originator_ip.ipaddr_v4; - attr.mp_nexthop_global_in = es->originator_ip.ipaddr_v4; - attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + /* First, locate the route node within the ESI or VNI. + * If it doesn't exist, ther is nothing to do. + * Note: there is no RD here. + */ + rn = bgp_node_lookup(rt_table, (struct prefix *)p); + if (!rn) + return 0; - /* Set up extended community. */ - build_evpn_type4_route_extcomm(es, &attr); + if (BGP_DEBUG(evpn_mh, EVPN_MH_RT)) + zlog_debug("local ES %s vni %u route-type %s nexthop %s delete", + es->esi_str, + vpn ? vpn->vni : 0, + p->prefix.route_type == BGP_EVPN_ES_ROUTE ? + "esr" : (vpn ? "ead-evi" : "ead-es"), + inet_ntoa(es->originator_ip)); - /* First, create (or fetch) route node within the ESI. */ - /* NOTE: There is no RD here. */ - rn = bgp_node_get(es->route_table, (struct prefix *)p); + /* Next, locate route node in the global EVPN routing table. + * Note that this table is a 2-level tree (RD-level + Prefix-level) + */ + global_rn = bgp_global_evpn_node_lookup(bgp->rib[afi][safi], afi, safi, + (const struct prefix_evpn *)p, prd); + if (global_rn) { - /* Create or update route entry. */ - ret = update_evpn_type4_route_entry(bgp, es, afi, safi, rn, &attr, 1, - &pi, &route_changed); - if (ret != 0) { - flog_err(EC_BGP_ES_INVALID, - "%u ERROR: Failed to updated ES route ESI: %s VTEP %s", - bgp->vrf_id, - esi_to_str(&p->prefix.es_addr.esi, buf, sizeof(buf)), - ipaddr2str(&es->originator_ip, buf1, sizeof(buf1))); - } - - assert(pi); - attr_new = pi->attr; - - /* Perform route selection; - * this is just to set the flags correctly - * as local route in the ES always wins. - */ - evpn_es_route_select_install(bgp, es, rn); - bgp_dest_unlock_node(rn); - - /* If this is a new route or some attribute has changed, export the - * route to the global table. The route will be advertised to peers - * from there. Note that this table is a 2-level tree (RD-level + - * Prefix-level) similar to L3VPN routes. - */ - if (route_changed) { - struct bgp_path_info *global_pi; - - rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &es->prd); - update_evpn_type4_route_entry(bgp, es, afi, safi, rn, attr_new, - 1, &global_pi, &route_changed); - - /* Schedule for processing and unlock node. */ - bgp_process(bgp, rn, afi, safi); - bgp_dest_unlock_node(rn); - } - - /* Unintern temporary. */ - aspath_unintern(&attr.aspath); - return 0; -} - -/* Delete EVPN ES (type-4) route */ -static int delete_evpn_type4_route(struct bgp *bgp, - struct evpnes *es, - struct prefix_evpn *p) -{ - afi_t afi = AFI_L2VPN; - safi_t safi = SAFI_EVPN; - struct bgp_path_info *pi; - struct bgp_node *rn = NULL; /* rn in esi table */ - struct bgp_node *global_rn = NULL; /* rn in global table */ - - /* First, locate the route node within the ESI. - * If it doesn't exist, ther is nothing to do. - * Note: there is no RD here. - */ - rn = bgp_node_lookup(es->route_table, (struct prefix *)p); - if (!rn) - return 0; - - /* Next, locate route node in the global EVPN routing table. - * Note that this table is a 2-level tree (RD-level + Prefix-level) - */ - global_rn = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi, - (struct prefix *)p, &es->prd); - if (global_rn) { - - /* Delete route entry in the global EVPN table. */ - delete_evpn_route_entry(bgp, afi, safi, global_rn, &pi); + /* Delete route entry in the global EVPN table. */ + delete_evpn_route_entry(bgp, afi, safi, global_rn, &pi); /* Schedule for processing - withdraws to peers happen from * this table. @@ -480,7 +492,7 @@ static int delete_evpn_type4_route(struct bgp *bgp, } /* - * Delete route entry in the ESI route table. + * Delete route entry in the ESI or VNI routing table. * This can just be removed. */ delete_evpn_route_entry(bgp, afi, safi, rn, &pi); @@ -490,176 +502,197 @@ static int delete_evpn_type4_route(struct bgp *bgp, return 0; } -/* - * Delete all routes in per ES route-table +/***************************************************************************** + * Ethernet Segment (Type-4) Routes + * ESRs are used for BUM handling. XXX - BUM support is planned for phase-2 i.e. + * this code is just a place holder for now */ -static int delete_all_es_routes(struct bgp *bgp, struct evpnes *es) +/* Build extended community for EVPN ES (type-4) route */ +static void bgp_evpn_type4_route_extcomm_build(struct bgp_evpn_es *es, + struct attr *attr) { - struct bgp_node *rn; - struct bgp_path_info *pi, *nextpi; - - /* Walk this ES's route table and delete all routes. */ - for (rn = bgp_table_top(es->route_table); rn; - rn = bgp_route_next(rn)) { - for (pi = bgp_dest_get_bgp_path_info(rn); - (pi != NULL) && (nextpi = pi->next, 1); pi = nextpi) { - bgp_path_info_delete(rn, pi); - bgp_path_info_reap(rn, pi); - } - } - - return 0; -} + struct ecommunity ecom_encap; + struct ecommunity ecom_es_rt; + struct ecommunity_val eval; + struct ecommunity_val eval_es_rt; + bgp_encap_types tnl_type; + struct ethaddr mac; -/* Delete (and withdraw) local routes for specified ES from global and ES table. - * Also remove all other routes from the per ES table. - * Invoked when ES is deleted. - */ -static int delete_routes_for_es(struct bgp *bgp, struct evpnes *es) -{ - int ret; - char buf[ESI_STR_LEN]; - struct prefix_evpn p; + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset(&ecom_encap, 0, sizeof(ecom_encap)); + encode_encap_extcomm(tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (uint8_t *)eval.val; + attr->ecommunity = ecommunity_dup(&ecom_encap); - /* Delete and withdraw locally learnt ES route */ - build_evpn_type4_prefix(&p, &es->esi, es->originator_ip.ipaddr_v4); - ret = delete_evpn_type4_route(bgp, es, &p); - if (ret) { - flog_err(EC_BGP_EVPN_ROUTE_DELETE, - "%u failed to delete type-4 route for ESI %s", - bgp->vrf_id, esi_to_str(&es->esi, buf, sizeof(buf))); - } + /* ES import RT */ + memset(&mac, 0, sizeof(struct ethaddr)); + memset(&ecom_es_rt, 0, sizeof(ecom_es_rt)); + es_get_system_mac(&es->esi, &mac); + encode_es_rt_extcomm(&eval_es_rt, &mac); + ecom_es_rt.size = 1; + ecom_es_rt.val = (uint8_t *)eval_es_rt.val; + attr->ecommunity = + ecommunity_merge(attr->ecommunity, &ecom_es_rt); - /* Delete all routes from per ES table */ - return delete_all_es_routes(bgp, es); + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); } -/* Install EVPN route entry in ES */ -static int install_evpn_route_entry_in_es(struct bgp *bgp, struct evpnes *es, - struct prefix_evpn *p, - struct bgp_path_info *parent_pi) +/* Create or update local type-4 route */ +static int bgp_evpn_type4_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p) { int ret = 0; + int route_changed = 0; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct attr attr; + struct attr *attr_new = NULL; struct bgp_node *rn = NULL; struct bgp_path_info *pi = NULL; - struct attr *attr_new = NULL; - /* Create (or fetch) route within the VNI. - * NOTE: There is no RD here. - */ + memset(&attr, 0, sizeof(struct attr)); + + /* Build path-attribute for this route. */ + bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); + attr.nexthop = es->originator_ip; + attr.mp_nexthop_global_in = es->originator_ip; + attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + + /* Set up extended community. */ + bgp_evpn_type4_route_extcomm_build(es, &attr); + + /* First, create (or fetch) route node within the ESI. */ + /* NOTE: There is no RD here. */ rn = bgp_node_get(es->route_table, (struct prefix *)p); - /* Check if route entry is already present. */ - for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next) - if (pi->extra - && (struct bgp_path_info *)pi->extra->parent == parent_pi) - break; + /* Create or update route entry. */ + ret = bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, + rn, &attr, 1, &pi, &route_changed); + if (ret != 0) { + flog_err(EC_BGP_ES_INVALID, + "%u ERROR: Failed to updated ES route ESI: %s VTEP %s", + bgp->vrf_id, es->esi_str, + inet_ntoa(es->originator_ip)); + } - if (!pi) { - /* Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(parent_pi->attr); + assert(pi); + attr_new = pi->attr; - /* Create new route with its attribute. */ - pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0, - parent_pi->peer, attr_new, rn); - SET_FLAG(pi->flags, BGP_PATH_VALID); - bgp_path_info_extra_get(pi); - pi->extra->parent = parent_pi; - bgp_path_info_add(rn, pi); - } else { - if (attrhash_cmp(pi->attr, parent_pi->attr) - && !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) { - bgp_dest_unlock_node(rn); - return 0; - } - /* The attribute has changed. */ - /* Add (or update) attribute to hash. */ - attr_new = bgp_attr_intern(parent_pi->attr); + /* Perform route selection; + * this is just to set the flags correctly + * as local route in the ES always wins. + */ + bgp_evpn_es_route_select_install(bgp, es, rn); + bgp_dest_unlock_node(rn); - /* Restore route, if needed. */ - if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) - bgp_path_info_restore(rn, pi); + /* If this is a new route or some attribute has changed, export the + * route to the global table. The route will be advertised to peers + * from there. Note that this table is a 2-level tree (RD-level + + * Prefix-level) similar to L3VPN routes. + */ + if (route_changed) { + struct bgp_path_info *global_pi; - /* Mark if nexthop has changed. */ - if (!IPV4_ADDR_SAME(&pi->attr->nexthop, &attr_new->nexthop)) - SET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED); + rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + p, &es->prd); + bgp_evpn_mh_route_update(bgp, es, NULL, afi, safi, + rn, attr_new, 1, &global_pi, &route_changed); - /* Unintern existing, set to new. */ - bgp_attr_unintern(&pi->attr); - pi->attr = attr_new; - pi->uptime = bgp_clock(); + /* Schedule for processing and unlock node. */ + bgp_process(bgp, rn, afi, safi); + bgp_dest_unlock_node(rn); } - /* Perform route selection and update zebra, if required. */ - ret = evpn_es_route_select_install(bgp, es, rn); - return ret; + /* Unintern temporary. */ + aspath_unintern(&attr.aspath); + return 0; } -/* Uninstall EVPN route entry from ES route table */ -static int uninstall_evpn_route_entry_in_es(struct bgp *bgp, struct evpnes *es, - struct prefix_evpn *p, - struct bgp_path_info *parent_pi) +/* Delete local type-4 route */ +static int bgp_evpn_type4_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p) { - int ret; - struct bgp_node *rn; - struct bgp_path_info *pi; + return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p); +} - if (!es->route_table) - return 0; +/* Process remote/received EVPN type-4 route (advertise or withdraw) */ +int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id) +{ + int ret; + esi_t esi; + uint8_t ipaddr_len; + struct in_addr vtep_ip; + struct prefix_rd prd; + struct prefix_evpn p; - /* Locate route within the ESI. - * NOTE: There is no RD here. + /* Type-4 route should be either 23 or 35 bytes + * RD (8), ESI (10), ip-len (1), ip (4 or 16) */ - rn = bgp_node_lookup(es->route_table, (struct prefix *)p); - if (!rn) - return 0; - - /* Find matching route entry. */ - for (pi = bgp_dest_get_bgp_path_info(rn); pi; pi = pi->next) - if (pi->extra - && (struct bgp_path_info *)pi->extra->parent == parent_pi) - break; + if (psize != BGP_EVPN_TYPE4_V4_PSIZE && + psize != BGP_EVPN_TYPE4_V6_PSIZE) { + flog_err(EC_BGP_EVPN_ROUTE_INVALID, + "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); + return -1; + } - if (!pi) - return 0; + /* Make prefix_rd */ + prd.family = AF_UNSPEC; + prd.prefixlen = 64; + memcpy(&prd.val, pfx, RD_BYTES); + pfx += RD_BYTES; - /* Mark entry for deletion */ - bgp_path_info_delete(rn, pi); + /* get the ESI */ + memcpy(&esi, pfx, ESI_BYTES); + pfx += ESI_BYTES; - /* Perform route selection and update zebra, if required. */ - ret = evpn_es_route_select_install(bgp, es, rn); - /* Unlock route node. */ - bgp_dest_unlock_node(rn); + /* Get the IP. */ + ipaddr_len = *pfx++; + if (ipaddr_len == IPV4_MAX_BITLEN) { + memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN); + } else { + flog_err( + EC_BGP_EVPN_ROUTE_INVALID, + "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d", + peer->bgp->vrf_id, peer->host, ipaddr_len); + return -1; + } + build_evpn_type4_prefix(&p, &esi, vtep_ip); + /* Process the route. */ + if (attr) { + ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr, + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, 0, NULL); + } else { + ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr, + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, NULL); + } return ret; } -/* - * Given a prefix, see if it belongs to ES. - */ -static int is_prefix_matching_for_es(struct prefix_evpn *p, - struct evpnes *es) +/* Check if a prefix belongs to the local ES */ +static bool bgp_evpn_type4_prefix_match(struct prefix_evpn *p, + struct bgp_evpn_es *es) { - /* if not an ES route return false */ - if (p->prefix.route_type != BGP_EVPN_ES_ROUTE) - return 0; - - if (memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t)) == 0) - return 1; - - return 0; + return (p->prefix.route_type == BGP_EVPN_ES_ROUTE) && + !memcmp(&p->prefix.es_addr.esi, &es->esi, sizeof(esi_t)); } -static int install_uninstall_routes_for_es(struct bgp *bgp, - struct evpnes *es, - int install) +/* Import remote ESRs on local ethernet segment add */ +static int bgp_evpn_type4_remote_routes_import(struct bgp *bgp, + struct bgp_evpn_es *es, bool install) { int ret; afi_t afi; safi_t safi; char buf[PREFIX_STRLEN]; - char buf1[ESI_STR_LEN]; struct bgp_node *rd_rn, *rn; struct bgp_table *table; struct bgp_path_info *pi; @@ -667,13 +700,11 @@ static int install_uninstall_routes_for_es(struct bgp *bgp, afi = AFI_L2VPN; safi = SAFI_EVPN; - /* - * Walk entire global routing table and evaluate routes which could be - * imported into this VRF. Note that we need to loop through all global - * routes to determine which route matches the import rt on vrf + /* Walk entire global routing table and evaluate routes which could be + * imported into this Ethernet Segment. */ for (rd_rn = bgp_table_top(bgp->rib[afi][safi]); rd_rn; - rd_rn = bgp_route_next(rd_rn)) { + rd_rn = bgp_route_next(rd_rn)) { table = bgp_dest_get_bgp_table_info(rd_rn); if (!table) continue; @@ -682,36 +713,35 @@ static int install_uninstall_routes_for_es(struct bgp *bgp, struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p; for (pi = bgp_dest_get_bgp_path_info(rn); pi; - pi = pi->next) { + pi = pi->next) { /* * Consider "valid" remote routes applicable for * this ES. */ if (!(CHECK_FLAG(pi->flags, BGP_PATH_VALID) - && pi->type == ZEBRA_ROUTE_BGP - && pi->sub_type == BGP_ROUTE_NORMAL)) + && pi->type == ZEBRA_ROUTE_BGP + && pi->sub_type == BGP_ROUTE_NORMAL)) continue; - if (!is_prefix_matching_for_es(evp, es)) + if (!bgp_evpn_type4_prefix_match(evp, es)) continue; if (install) - ret = install_evpn_route_entry_in_es( - bgp, es, evp, pi); + ret = bgp_evpn_es_route_install( + bgp, es, evp, pi); else - ret = uninstall_evpn_route_entry_in_es( - bgp, es, evp, pi); + ret = bgp_evpn_es_route_uninstall( + bgp, es, evp, pi); if (ret) { flog_err( - EC_BGP_EVPN_FAIL, - "Failed to %s EVPN %s route in ESI %s", - install ? "install" + EC_BGP_EVPN_FAIL, + "Failed to %s EVPN %s route in ESI %s", + install ? "install" : "uninstall", - prefix2str(evp, buf, - sizeof(buf)), - esi_to_str(&es->esi, buf1, - sizeof(buf1))); + prefix2str(evp, buf, + sizeof(buf)), + es->esi_str); return ret; } } @@ -720,251 +750,2148 @@ static int install_uninstall_routes_for_es(struct bgp *bgp, return 0; } -/* Install any existing remote ES routes applicable for this ES into its routing - * table. This is invoked when ES comes up. +/***************************************************************************** + * Ethernet Auto Discovery (EAD/Type-1) route handling + * There are two types of EAD routes - + * 1. EAD-per-ES - Key: {ESI, ET=0xffffffff} + * 2. EAD-per-EVI - Key: {ESI, ET=0} */ -static int install_routes_for_es(struct bgp *bgp, struct evpnes *es) -{ - return install_uninstall_routes_for_es(bgp, es, 1); -} -/* Install or unistall route in ES */ -int install_uninstall_route_in_es(struct bgp *bgp, struct evpnes *es, - afi_t afi, safi_t safi, - struct prefix_evpn *evp, - struct bgp_path_info *pi, int install) +/* Extended communities associated with EAD-per-ES */ +static void bgp_evpn_type1_es_route_extcomm_build(struct bgp_evpn_es *es, + struct attr *attr) { - int ret = 0; - char buf[ESI_STR_LEN]; + struct ecommunity ecom_encap; + struct ecommunity ecom_esi_label; + struct ecommunity_val eval; + struct ecommunity_val eval_esi_label; + bgp_encap_types tnl_type; + struct listnode *evi_node, *rt_node; + struct ecommunity *ecom; + struct bgp_evpn_es_evi *es_evi; - if (install) - ret = install_evpn_route_entry_in_es(bgp, es, evp, pi); - else - ret = uninstall_evpn_route_entry_in_es(bgp, es, evp, pi); + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset(&ecom_encap, 0, sizeof(ecom_encap)); + encode_encap_extcomm(tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (uint8_t *)eval.val; + attr->ecommunity = ecommunity_dup(&ecom_encap); - if (ret) { - flog_err( - EC_BGP_EVPN_FAIL, - "%u: Failed to %s EVPN %s route in ESI %s", bgp->vrf_id, - install ? "install" : "uninstall", "ES", - esi_to_str(&evp->prefix.es_addr.esi, buf, sizeof(buf))); - return ret; - } - return 0; -} + /* ESI label */ + encode_esi_label_extcomm(&eval_esi_label, + false /*single_active*/); + ecom_esi_label.size = 1; + ecom_esi_label.val = (uint8_t *)eval_esi_label.val; + attr->ecommunity = + ecommunity_merge(attr->ecommunity, &ecom_esi_label); + + /* Add export RTs for all L2-VNIs associated with this ES */ + /* XXX - suppress EAD-ES advertisment if there are no EVIs associated + * with it. + */ + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, + evi_node, es_evi)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + continue; + for (ALL_LIST_ELEMENTS_RO(es_evi->vpn->export_rtl, + rt_node, ecom)) + attr->ecommunity = ecommunity_merge(attr->ecommunity, + ecom); + } + + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); +} + +/* Extended communities associated with EAD-per-EVI */ +static void bgp_evpn_type1_evi_route_extcomm_build(struct bgp_evpn_es *es, + struct bgpevpn *vpn, struct attr *attr) +{ + struct ecommunity ecom_encap; + struct ecommunity_val eval; + bgp_encap_types tnl_type; + struct listnode *rt_node; + struct ecommunity *ecom; + + /* Encap */ + tnl_type = BGP_ENCAP_TYPE_VXLAN; + memset(&ecom_encap, 0, sizeof(ecom_encap)); + encode_encap_extcomm(tnl_type, &eval); + ecom_encap.size = 1; + ecom_encap.val = (uint8_t *)eval.val; + attr->ecommunity = ecommunity_dup(&ecom_encap); + + /* Add export RTs for the L2-VNI */ + for (ALL_LIST_ELEMENTS_RO(vpn->export_rtl, rt_node, ecom)) + attr->ecommunity = ecommunity_merge(attr->ecommunity, ecom); + + attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES); +} + +/* Update EVPN EAD (type-1) route - + * vpn - valid for EAD-EVI routes and NULL for EAD-ES routes + */ +static int bgp_evpn_type1_route_update(struct bgp *bgp, + struct bgp_evpn_es *es, struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + int ret = 0; + afi_t afi = AFI_L2VPN; + safi_t safi = SAFI_EVPN; + struct attr attr; + struct attr *attr_new = NULL; + struct bgp_node *rn = NULL; + struct bgp_path_info *pi = NULL; + int route_changed = 0; + struct prefix_rd *global_rd; + + memset(&attr, 0, sizeof(struct attr)); + + /* Build path-attribute for this route. */ + bgp_attr_default_set(&attr, BGP_ORIGIN_IGP); + attr.nexthop = es->originator_ip; + attr.mp_nexthop_global_in = es->originator_ip; + attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4; + + if (vpn) { + /* EAD-EVI route update */ + /* MPLS label */ + vni2label(vpn->vni, &(attr.label)); + + /* Set up extended community */ + bgp_evpn_type1_evi_route_extcomm_build(es, vpn, &attr); + + /* First, create (or fetch) route node within the VNI. */ + rn = bgp_node_get(vpn->route_table, (struct prefix *)p); + + /* Create or update route entry. */ + ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, + rn, &attr, 1, &pi, &route_changed); + if (ret != 0) { + flog_err(EC_BGP_ES_INVALID, + "%u Failed to update EAD-EVI route ESI: %s VNI %u VTEP %s", + bgp->vrf_id, es->esi_str, vpn->vni, + inet_ntoa(es->originator_ip)); + } + global_rd = &vpn->prd; + } else { + /* EAD-ES route update */ + /* MPLS label is 0 for EAD-ES route */ + + /* Set up extended community */ + bgp_evpn_type1_es_route_extcomm_build(es, &attr); + + /* First, create (or fetch) route node within the ES. */ + /* NOTE: There is no RD here. */ + /* XXX: fragment ID must be included as a part of the prefix. */ + rn = bgp_node_get(es->route_table, (struct prefix *)p); + + /* Create or update route entry. */ + ret = bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, + rn, &attr, 1, &pi, &route_changed); + if (ret != 0) { + flog_err(EC_BGP_ES_INVALID, + "%u ERROR: Failed to updated EAD-EVI route ESI: %s VTEP %s", + bgp->vrf_id, es->esi_str, + inet_ntoa(es->originator_ip)); + } + global_rd = &es->prd; + } + + + assert(pi); + attr_new = pi->attr; + + /* Perform route selection; + * this is just to set the flags correctly as local route in + * the ES always wins. + */ + evpn_route_select_install(bgp, vpn, rn); + bgp_dest_unlock_node(rn); + + /* If this is a new route or some attribute has changed, export the + * route to the global table. The route will be advertised to peers + * from there. Note that this table is a 2-level tree (RD-level + + * Prefix-level) similar to L3VPN routes. + */ + if (route_changed) { + struct bgp_path_info *global_pi; + + rn = bgp_global_evpn_node_get(bgp->rib[afi][safi], afi, safi, + p, global_rd); + bgp_evpn_mh_route_update(bgp, es, vpn, afi, safi, + rn, attr_new, 1, &global_pi, &route_changed); + + /* Schedule for processing and unlock node. */ + bgp_process(bgp, rn, afi, safi); + bgp_dest_unlock_node(rn); + } + + /* Unintern temporary. */ + aspath_unintern(&attr.aspath); + return 0; +} + +/* Delete local Type-1 route */ +static int bgp_evpn_type1_es_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, struct prefix_evpn *p) +{ + return bgp_evpn_mh_route_delete(bgp, es, NULL /* l2vni */, p); +} + +static int bgp_evpn_type1_evi_route_delete(struct bgp *bgp, + struct bgp_evpn_es *es, struct bgpevpn *vpn, + struct prefix_evpn *p) +{ + return bgp_evpn_mh_route_delete(bgp, es, vpn, p); +} + +/* Generate EAD-EVI for all VNIs */ +static void bgp_evpn_local_type1_evi_route_add(struct bgp *bgp, + struct bgp_evpn_es *es) +{ + struct listnode *evi_node; + struct prefix_evpn p; + struct bgp_evpn_es_evi *es_evi; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) + /* EAD-EVI route add for this ES is already done */ + return; + + SET_FLAG(es->flags, BGP_EVPNES_ADV_EVI); + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + continue; + if (bgp_evpn_type1_route_update(bgp, es, es_evi->vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: Type4 route creation failure for ESI %s", + bgp->vrf_id, es->esi_str); + } +} + +/* + * Withdraw EAD-EVI for all VNIs + */ +static void bgp_evpn_local_type1_evi_route_del(struct bgp *bgp, + struct bgp_evpn_es *es) +{ + struct listnode *evi_node; + struct prefix_evpn p; + struct bgp_evpn_es_evi *es_evi; + + /* Delete and withdraw locally learnt EAD-EVI route */ + if (!CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) + /* EAD-EVI route has not been advertised for this ES */ + return; + + UNSET_FLAG(es->flags, BGP_EVPNES_ADV_EVI); + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, evi_node, es_evi)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + continue; + if (bgp_evpn_mh_route_delete(bgp, es, es_evi->vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: Type4 route creation failure for ESI %s", + bgp->vrf_id, es->esi_str); + } +} /* - * Process received EVPN type-4 route (advertise or withdraw). + * Process received EVPN type-1 route (advertise or withdraw). */ -int process_type4_route(struct peer *peer, afi_t afi, safi_t safi, - struct attr *attr, uint8_t *pfx, int psize, - uint32_t addpath_id) +int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id) { int ret; + struct prefix_rd prd; esi_t esi; - uint8_t ipaddr_len; + uint32_t eth_tag; + mpls_label_t label; struct in_addr vtep_ip; - struct prefix_rd prd; struct prefix_evpn p; - /* Type-4 route should be either 23 or 35 bytes - * RD (8), ESI (10), ip-len (1), ip (4 or 16) - */ - if (psize != 23 && psize != 35) { + if (psize != BGP_EVPN_TYPE1_PSIZE) { flog_err(EC_BGP_EVPN_ROUTE_INVALID, - "%u:%s - Rx EVPN Type-4 NLRI with invalid length %d", - peer->bgp->vrf_id, peer->host, psize); + "%u:%s - Rx EVPN Type-1 NLRI with invalid length %d", + peer->bgp->vrf_id, peer->host, psize); return -1; } /* Make prefix_rd */ prd.family = AF_UNSPEC; prd.prefixlen = 64; - memcpy(&prd.val, pfx, 8); - pfx += 8; + memcpy(&prd.val, pfx, RD_BYTES); + pfx += RD_BYTES; /* get the ESI */ memcpy(&esi, pfx, ESI_BYTES); pfx += ESI_BYTES; + /* Copy Ethernet Tag */ + memcpy(ð_tag, pfx, EVPN_ETH_TAG_BYTES); + eth_tag = ntohl(eth_tag); + pfx += EVPN_ETH_TAG_BYTES; - /* Get the IP. */ - ipaddr_len = *pfx++; - if (ipaddr_len == IPV4_MAX_BITLEN) { - memcpy(&vtep_ip, pfx, IPV4_MAX_BYTELEN); - } else { - flog_err( - EC_BGP_EVPN_ROUTE_INVALID, - "%u:%s - Rx EVPN Type-4 NLRI with unsupported IP address length %d", - peer->bgp->vrf_id, peer->host, ipaddr_len); - return -1; - } + memcpy(&label, pfx, BGP_LABEL_BYTES); - build_evpn_type4_prefix(&p, &esi, vtep_ip); + /* EAD route prefix doesn't include the nexthop in the global + * table + */ + vtep_ip.s_addr = 0; + build_evpn_type1_prefix(&p, eth_tag, &esi, vtep_ip); /* Process the route. */ if (attr) { ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr, - afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, - &prd, NULL, 0, 0, NULL); + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, 0, NULL); } else { ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr, - afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, - &prd, NULL, 0, NULL); + afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL, + &prd, NULL, 0, NULL); } return ret; } -/* - * Lookup local ES. +/*****************************************************************************/ +/* Ethernet Segment Management + * 1. Ethernet Segment is a collection of links attached to the same + * server (MHD) or switch (MHN) + * 2. An Ethernet Segment can span multiple PEs and is identified by the + * 10-byte ES-ID. + * 3. Local ESs are configured in zebra and sent to BGP + * 4. Remote ESs are created by BGP when one or more ES-EVIs reference it i.e. + * created on first reference and release on last de-reference + * 5. An ES can be both local and remote. Infact most local ESs are expected + * to have an ES peer. + */ + +/* A list of remote VTEPs is maintained for each ES. This list includes - + * 1. VTEPs for which we have imported the ESR i.e. ES-peers + * 2. VTEPs that have an "active" ES-EVI VTEP i.e. EAD-per-ES and EAD-per-EVI + * have been imported into one or more VNIs */ -struct evpnes *bgp_evpn_lookup_es(struct bgp *bgp, esi_t *esi) +static int bgp_evpn_es_vtep_cmp(void *p1, void *p2) +{ + const struct bgp_evpn_es_vtep *es_vtep1 = p1; + const struct bgp_evpn_es_vtep *es_vtep2 = p2; + + return es_vtep1->vtep_ip.s_addr - es_vtep2->vtep_ip.s_addr; +} + +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_new(struct bgp_evpn_es *es, + struct in_addr vtep_ip) +{ + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_VTEP, sizeof(*es_vtep)); + + es_vtep->es = es; + es_vtep->vtep_ip.s_addr = vtep_ip.s_addr; + listnode_init(&es_vtep->es_listnode, es_vtep); + listnode_add_sort(es->es_vtep_list, &es_vtep->es_listnode); + + return es_vtep; +} + +static void bgp_evpn_es_vtep_free(struct bgp_evpn_es_vtep *es_vtep) +{ + struct bgp_evpn_es *es = es_vtep->es; + + if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR) || + es_vtep->evi_cnt) + /* as long as there is some reference we can't free it */ + return; + + list_delete_node(es->es_vtep_list, &es_vtep->es_listnode); + XFREE(MTYPE_BGP_EVPN_ES_VTEP, es_vtep); +} + +/* check if VTEP is already part of the list */ +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_find(struct bgp_evpn_es *es, + struct in_addr vtep_ip) +{ + struct listnode *node = NULL; + struct bgp_evpn_es_vtep *es_vtep; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (es_vtep->vtep_ip.s_addr == vtep_ip.s_addr) + return es_vtep; + } + return NULL; +} + +/* Send the remote ES to zebra for NHG programming */ +static int bgp_zebra_send_remote_es_vtep(struct bgp *bgp, + struct bgp_evpn_es_vtep *es_vtep, bool add) +{ + struct bgp_evpn_es *es = es_vtep->es; + struct stream *s; + + /* Check socket. */ + if (!zclient || zclient->sock < 0) + return 0; + + /* Don't try to register if Zebra doesn't know of this instance. */ + if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp)) { + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("No zebra instance, not installing remote es %s", + es->esi_str); + return 0; + } + + s = zclient->obuf; + stream_reset(s); + + zclient_create_header(s, + add ? ZEBRA_REMOTE_ES_VTEP_ADD : ZEBRA_REMOTE_ES_VTEP_DEL, + bgp->vrf_id); + stream_put(s, &es->esi, sizeof(esi_t)); + stream_put_ipv4(s, es_vtep->vtep_ip.s_addr); + + stream_putw_at(s, 0, stream_get_endp(s)); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("Tx %s Remote ESI %s VTEP %s", + add ? "ADD" : "DEL", es->esi_str, + inet_ntoa(es_vtep->vtep_ip)); + + return zclient_send_message(zclient); +} + +static void bgp_evpn_es_vtep_re_eval_active(struct bgp *bgp, + struct bgp_evpn_es_vtep *es_vtep) +{ + bool old_active; + bool new_active; + + old_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + /* currently we need an active EVI reference to use the VTEP as + * a nexthop. this may change... + */ + if (es_vtep->evi_cnt) + SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + else + UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + + new_active = !!CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE); + + if (old_active == new_active) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %s %s", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), + new_active ? "active" : "inactive"); + + /* send remote ES to zebra */ + bgp_zebra_send_remote_es_vtep(bgp, es_vtep, new_active); + + /* queue up the es for background consistency checks */ + bgp_evpn_es_cons_checks_pend_add(es_vtep->es); +} + +static struct bgp_evpn_es_vtep *bgp_evpn_es_vtep_add(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr) +{ + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip); + + if (!es_vtep) + es_vtep = bgp_evpn_es_vtep_new(es, vtep_ip); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %s add %s", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), + esr ? "esr" : "ead"); + + if (esr) + SET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR); + else + ++es_vtep->evi_cnt; + + bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep); + + return es_vtep; +} + +static void bgp_evpn_es_vtep_do_del(struct bgp *bgp, + struct bgp_evpn_es_vtep *es_vtep, bool esr) +{ + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s vtep %s del %s", + es_vtep->es->esi_str, + inet_ntoa(es_vtep->vtep_ip), + esr ? "esr" : "ead"); + if (esr) { + UNSET_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ESR); + } else { + if (es_vtep->evi_cnt) + --es_vtep->evi_cnt; + } + + bgp_evpn_es_vtep_re_eval_active(bgp, es_vtep); + bgp_evpn_es_vtep_free(es_vtep); +} + +static void bgp_evpn_es_vtep_del(struct bgp *bgp, + struct bgp_evpn_es *es, struct in_addr vtep_ip, bool esr) +{ + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = bgp_evpn_es_vtep_find(es, vtep_ip); + if (es_vtep) + bgp_evpn_es_vtep_do_del(bgp, es_vtep, esr); +} + +/* compare ES-IDs for the global ES RB tree */ +static int bgp_es_rb_cmp(const struct bgp_evpn_es *es1, + const struct bgp_evpn_es *es2) +{ + return memcmp(&es1->esi, &es2->esi, ESI_BYTES); +} +RB_GENERATE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp); + +struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi) { - struct evpnes *es; - struct evpnes tmp; + struct bgp_evpn_es tmp; - memset(&tmp, 0, sizeof(struct evpnes)); memcpy(&tmp.esi, esi, sizeof(esi_t)); - es = hash_lookup(bgp->esihash, &tmp); - return es; + return RB_FIND(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, &tmp); } -/* - * Create a new local es - invoked upon zebra notification. - */ -static struct evpnes *bgp_evpn_es_new(struct bgp *bgp, - esi_t *esi, - struct ipaddr *originator_ip) +static struct bgp_evpn_es *bgp_evpn_es_new(struct bgp *bgp, const esi_t *esi) { - char buf[100]; - struct evpnes *es; + struct bgp_evpn_es *es; if (!bgp) return NULL; - es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct evpnes)); + es = XCALLOC(MTYPE_BGP_EVPN_ES, sizeof(struct bgp_evpn_es)); - /* set the ESI and originator_ip */ + /* set the ESI */ memcpy(&es->esi, esi, sizeof(esi_t)); - memcpy(&es->originator_ip, originator_ip, sizeof(struct ipaddr)); /* Initialise the VTEP list */ - es->vtep_list = list_new(); - es->vtep_list->cmp = evpn_vtep_ip_cmp; + es->es_vtep_list = list_new(); + listset_app_node_mem(es->es_vtep_list); + es->es_vtep_list->cmp = bgp_evpn_es_vtep_cmp; - /* auto derive RD for this es */ - bf_assign_index(bm->rd_idspace, es->rd_id); - es->prd.family = AF_UNSPEC; - es->prd.prefixlen = 64; - sprintf(buf, "%s:%hu", inet_ntoa(bgp->router_id), es->rd_id); - (void)str2prefix_rd(buf, &es->prd); + esi_to_str(&es->esi, es->esi_str, sizeof(es->esi_str)); - /* Initialize the ES route table */ + /* Initialize the ES routing table */ es->route_table = bgp_table_init(bgp, AFI_L2VPN, SAFI_EVPN); - /* Add to hash */ - if (!hash_get(bgp->esihash, es, hash_alloc_intern)) { + /* Add to rb_tree */ + if (RB_INSERT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es)) { XFREE(MTYPE_BGP_EVPN_ES, es); return NULL; } - QOBJ_REG(es, evpnes); + /* Initialise the ES-EVI list */ + es->es_evi_list = list_new(); + listset_app_node_mem(es->es_evi_list); + + QOBJ_REG(es, bgp_evpn_es); + return es; } -/* - * Free a given ES - +/* Free a given ES - * This just frees appropriate memory, caller should have taken other * needed actions. */ -static void bgp_evpn_es_free(struct bgp *bgp, struct evpnes *es) +static void bgp_evpn_es_free(struct bgp_evpn_es *es) { - list_delete(&es->vtep_list); + if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) + return; + + /* cleanup resources maintained against the ES */ + list_delete(&es->es_evi_list); + list_delete(&es->es_vtep_list); bgp_table_unlock(es->route_table); - bf_release_index(bm->rd_idspace, es->rd_id); - hash_release(bgp->esihash, es); + + /* remove the entry from various databases */ + RB_REMOVE(bgp_es_rb_head, &bgp_mh_info->es_rb_tree, es); + bgp_evpn_es_cons_checks_pend_del(es); + QOBJ_UNREG(es); XFREE(MTYPE_BGP_EVPN_ES, es); } -/* - * bgp_evpn_local_es_del - */ -int bgp_evpn_local_es_del(struct bgp *bgp, - esi_t *esi, - struct ipaddr *originator_ip) +/* init local info associated with the ES */ +static void bgp_evpn_es_local_info_set(struct bgp *bgp, struct bgp_evpn_es *es) { - char buf[ESI_STR_LEN]; - struct evpnes *es = NULL; + char buf[BGP_EVPN_PREFIX_RD_LEN]; - if (!bgp->esihash) { - flog_err(EC_BGP_ES_CREATE, "%u: ESI hash not yet created", - bgp->vrf_id); - return -1; - } + if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) + return; - /* Lookup ESI hash - should exist. */ - es = bgp_evpn_lookup_es(bgp, esi); - if (!es) { - flog_warn(EC_BGP_EVPN_ESI, - "%u: ESI hash entry for ESI %s at Local ES DEL", - bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); - return -1; - } + SET_FLAG(es->flags, BGP_EVPNES_LOCAL); + listnode_init(&es->es_listnode, es); + listnode_add(bgp_mh_info->local_es_list, &es->es_listnode); - /* Delete all local EVPN ES routes from ESI table - * and schedule for processing (to withdraw from peers)) - */ - delete_routes_for_es(bgp, es); + /* auto derive RD for this es */ + bf_assign_index(bm->rd_idspace, es->rd_id); + es->prd.family = AF_UNSPEC; + es->prd.prefixlen = 64; + sprintf(buf, "%s:%hu", inet_ntoa(bgp->router_id), es->rd_id); + (void)str2prefix_rd(buf, &es->prd); +} + +/* clear any local info associated with the ES */ +static void bgp_evpn_es_local_info_clear(struct bgp_evpn_es *es) +{ + if (!CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) + return; - /* free the hash entry */ - bgp_evpn_es_free(bgp, es); + UNSET_FLAG(es->flags, BGP_EVPNES_LOCAL); - return 0; + /* remove from the ES local list */ + list_delete_node(bgp_mh_info->local_es_list, &es->es_listnode); + + bf_release_index(bm->rd_idspace, es->rd_id); + + bgp_evpn_es_free(es); } -/* - * bgp_evpn_local_es_add - */ -int bgp_evpn_local_es_add(struct bgp *bgp, - esi_t *esi, - struct ipaddr *originator_ip) +/* eval remote info associated with the ES */ +static void bgp_evpn_es_remote_info_re_eval(struct bgp_evpn_es *es) +{ + if (es->remote_es_evi_cnt) { + SET_FLAG(es->flags, BGP_EVPNES_REMOTE); + } else { + if (CHECK_FLAG(es->flags, BGP_EVPNES_REMOTE)) { + UNSET_FLAG(es->flags, BGP_EVPNES_REMOTE); + bgp_evpn_es_free(es); + } + } +} + +/* Process ES link oper-down by withdrawing ES-EAD and ESR */ +static void bgp_evpn_local_es_down(struct bgp *bgp, + struct bgp_evpn_es *es) { - char buf[ESI_STR_LEN]; - struct evpnes *es = NULL; struct prefix_evpn p; + int ret; - if (!bgp->esihash) { - flog_err(EC_BGP_ES_CREATE, "%u: ESI hash not yet created", - bgp->vrf_id); - return -1; - } + if (!CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) + return; - /* create the new es */ - es = bgp_evpn_lookup_es(bgp, esi); - if (!es) { - es = bgp_evpn_es_new(bgp, esi, originator_ip); - if (!es) { - flog_err( - EC_BGP_ES_CREATE, - "%u: Failed to allocate ES entry for ESI %s - at Local ES Add", - bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); - return -1; - } - } - UNSET_FLAG(es->flags, EVPNES_REMOTE); - SET_FLAG(es->flags, EVPNES_LOCAL); + UNSET_FLAG(es->flags, BGP_EVPNES_OPER_UP); - build_evpn_type4_prefix(&p, esi, originator_ip->ipaddr_v4); - if (update_evpn_type4_route(bgp, es, &p)) { - flog_err(EC_BGP_EVPN_ROUTE_CREATE, - "%u: Type4 route creation failure for ESI %s", - bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); - return -1; + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("local es %s down", es->esi_str); + + /* withdraw ESR */ + /* Delete and withdraw locally learnt ES route */ + build_evpn_type4_prefix(&p, &es->esi, es->originator_ip); + ret = bgp_evpn_type4_route_delete(bgp, es, &p); + if (ret) { + flog_err(EC_BGP_EVPN_ROUTE_DELETE, + "%u failed to delete type-4 route for ESI %s", + bgp->vrf_id, es->esi_str); } - /* import all remote ES routes in th ES table */ - install_routes_for_es(bgp, es); + /* withdraw EAD-EVI */ + if (!bgp_mh_info->ead_evi_adv_for_down_links) + bgp_evpn_local_type1_evi_route_del(bgp, es); - return 0; -} + /* withdraw EAD-ES */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + ret = bgp_evpn_type1_es_route_delete(bgp, es, &p); + if (ret) { + flog_err(EC_BGP_EVPN_ROUTE_DELETE, + "%u failed to delete type-1 route for ESI %s", + bgp->vrf_id, es->esi_str); + } +} + +/* Process ES link oper-up by generating ES-EAD and ESR */ +static void bgp_evpn_local_es_up(struct bgp *bgp, struct bgp_evpn_es *es) +{ + struct prefix_evpn p; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) + return; + + SET_FLAG(es->flags, BGP_EVPNES_OPER_UP); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("local es %s up", es->esi_str); + + /* generate ESR */ + build_evpn_type4_prefix(&p, &es->esi, es->originator_ip); + if (bgp_evpn_type4_route_update(bgp, es, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: Type4 route creation failure for ESI %s", + bgp->vrf_id, es->esi_str); + + /* generate EAD-EVI */ + bgp_evpn_local_type1_evi_route_add(bgp, es); + + /* generate EAD-ES */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + bgp_evpn_type1_route_update(bgp, es, NULL, &p); +} + +static void bgp_evpn_local_es_do_del(struct bgp *bgp, struct bgp_evpn_es *es) +{ + struct bgp_evpn_es_evi *es_evi; + struct listnode *evi_node, *evi_next_node; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del local es %s", es->esi_str); + + /* Delete all local EVPN ES routes from ESI table + * and schedule for processing (to withdraw from peers)) + */ + bgp_evpn_es_route_del_all(bgp, es); + + /* release all local ES EVIs associated with the ES */ + for (ALL_LIST_ELEMENTS(es->es_evi_list, evi_node, + evi_next_node, es_evi)) { + bgp_evpn_local_es_evi_do_del(es_evi); + } + + /* Clear local info associated with the ES and free it up if there is + * no remote reference + */ + bgp_evpn_es_local_info_clear(es); +} + +int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi) +{ + struct bgp_evpn_es *es = NULL; + + /* Lookup ESI hash - should exist. */ + es = bgp_evpn_es_find(esi); + if (!es) { + flog_warn(EC_BGP_EVPN_ESI, + "%u: ES %s missing at local ES DEL", + bgp->vrf_id, es->esi_str); + return -1; + } + + bgp_evpn_local_es_do_del(bgp, es); + return 0; +} + +/* Handle device to ES id association. Results in the creation of a local + * ES. + */ +int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, + struct in_addr originator_ip, bool oper_up) +{ + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + bool new_es = true; + + /* create the new es */ + es = bgp_evpn_es_find(esi); + if (es) { + if (CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL)) + new_es = false; + } else { + es = bgp_evpn_es_new(bgp, esi); + if (!es) { + flog_err(EC_BGP_ES_CREATE, + "%u: Failed to allocate ES entry for ESI %s - at Local ES Add", + bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + } + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add local es %s orig-ip %s", + es->esi_str, + inet_ntoa(originator_ip)); + + es->originator_ip = originator_ip; + bgp_evpn_es_local_info_set(bgp, es); + + /* import all remote Type-4 routes in the ES table */ + if (new_es) + bgp_evpn_type4_remote_routes_import(bgp, es, + true /* install */); + + /* create and advertise EAD-EVI routes for the ES - + * XXX - till an ES-EVI reference is created there is really nothing to + * advertise + */ + if (bgp_mh_info->ead_evi_adv_for_down_links) + bgp_evpn_local_type1_evi_route_add(bgp, es); + + /* If the ES link is operationally up generate EAD-ES. EAD-EVI + * can be generated even if the link is inactive. + */ + if (oper_up) + bgp_evpn_local_es_up(bgp, es); + else + bgp_evpn_local_es_down(bgp, es); + + return 0; +} + +static char *bgp_evpn_es_vteps_str(char *vtep_str, struct bgp_evpn_es *es) +{ + char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ]; + struct listnode *node; + struct bgp_evpn_es_vtep *es_vtep; + bool first = true; + + vtep_str[0] = '\0'; + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + vtep_flag_str[0] = '\0'; + if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) + strcpy(vtep_flag_str + strlen(vtep_flag_str), "E"); + if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE) + strcpy(vtep_flag_str + strlen(vtep_flag_str), "A"); + + if (!strlen(vtep_flag_str)) + strcpy(vtep_flag_str, "-"); + if (first) { + first = false; + sprintf(vtep_str + strlen(vtep_str), "%s(%s)", + inet_ntoa(es_vtep->vtep_ip), + vtep_flag_str); + } else { + sprintf(vtep_str + strlen(vtep_str), ",%s(%s)", + inet_ntoa(es_vtep->vtep_ip), + vtep_flag_str); + } + } + + return vtep_str; +} + +static inline void json_array_string_add(json_object *json, const char *str) +{ + json_object_array_add(json, json_object_new_string(str)); +} + +static void bgp_evpn_es_json_vtep_fill(json_object *json_vteps, + struct bgp_evpn_es_vtep *es_vtep) +{ + json_object *json_vtep_entry; + json_object *json_flags; + + json_vtep_entry = json_object_new_object(); + + json_object_string_add(json_vtep_entry, "vtep_ip", + inet_ntoa(es_vtep->vtep_ip)); + if (es_vtep->flags & (BGP_EVPNES_VTEP_ESR | + BGP_EVPNES_VTEP_ACTIVE)) { + json_flags = json_object_new_array(); + if (es_vtep->flags & BGP_EVPNES_VTEP_ESR) + json_array_string_add(json_flags, "esr"); + if (es_vtep->flags & BGP_EVPNES_VTEP_ACTIVE) + json_array_string_add(json_flags, "active"); + json_object_object_add(json_vtep_entry, "flags", json_flags); + } + + json_object_array_add(json_vteps, + json_vtep_entry); +} + +static void bgp_evpn_es_show_entry(struct vty *vty, + struct bgp_evpn_es *es, json_object *json) +{ + char buf1[RD_ADDRSTRLEN]; + struct listnode *node; + struct bgp_evpn_es_vtep *es_vtep; + + if (json) { + json_object *json_vteps; + json_object *json_types; + + json_object_string_add(json, "esi", es->esi_str); + json_object_string_add(json, "rd", + prefix_rd2str(&es->prd, buf1, + sizeof(buf1))); + + if (es->flags & (BGP_EVPNES_LOCAL | BGP_EVPNES_REMOTE)) { + json_types = json_object_new_array(); + if (es->flags & BGP_EVPNES_LOCAL) + json_array_string_add(json_types, "local"); + if (es->flags & BGP_EVPNES_REMOTE) + json_array_string_add(json_types, "remote"); + json_object_object_add(json, "type", json_types); + } + + if (listcount(es->es_vtep_list)) { + json_vteps = json_object_new_array(); + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, + node, es_vtep)) { + bgp_evpn_es_json_vtep_fill(json_vteps, es_vtep); + } + json_object_object_add(json, "vteps", json_vteps); + } + json_object_int_add(json, "vniCount", + listcount(es->es_evi_list)); + } else { + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + + type_str[0] = '\0'; + if (es->flags & BGP_EVPNES_LOCAL) + strcpy(type_str + strlen(type_str), "L"); + if (es->flags & BGP_EVPNES_REMOTE) + strcpy(type_str + strlen(type_str), "R"); + if (es->inconsistencies) + strcpy(type_str + strlen(type_str), "I"); + + bgp_evpn_es_vteps_str(vtep_str, es); + + if (es->flags & BGP_EVPNES_LOCAL) + prefix_rd2str(&es->prd, buf1, sizeof(buf1)); + else + strcpy(buf1, "-"); + + vty_out(vty, "%-30s %-5s %-21s %-8d %s\n", + es->esi_str, type_str, buf1, + listcount(es->es_evi_list), vtep_str); + } +} + +static void bgp_evpn_es_show_entry_detail(struct vty *vty, + struct bgp_evpn_es *es, json_object *json) +{ + if (json) { + json_object *json_flags; + json_object *json_incons; + + /* Add the "brief" info first */ + bgp_evpn_es_show_entry(vty, es, json); + if (es->flags & (BGP_EVPNES_OPER_UP | BGP_EVPNES_ADV_EVI)) { + json_flags = json_object_new_array(); + if (es->flags & BGP_EVPNES_OPER_UP) + json_array_string_add(json_flags, "up"); + if (es->flags & BGP_EVPNES_ADV_EVI) + json_array_string_add(json_flags, + "advertiseEVI"); + json_object_object_add(json, "flags", json_flags); + } + json_object_string_add(json, "originator_ip", + inet_ntoa(es->originator_ip)); + json_object_int_add(json, "remoteVniCount", + es->remote_es_evi_cnt); + json_object_int_add(json, "inconsistentVniVtepCount", + es->incons_evi_vtep_cnt); + if (es->inconsistencies) { + json_incons = json_object_new_array(); + if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST) + json_array_string_add(json_incons, + "vni-vtep-mismatch"); + json_object_object_add(json, "inconsistencies", + json_incons); + } + } else { + char incons_str[BGP_EVPNES_INCONS_STR_SZ]; + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + char buf1[RD_ADDRSTRLEN]; + + type_str[0] = '\0'; + if (es->flags & BGP_EVPNES_LOCAL) + strcpy(type_str + strlen(type_str), "L"); + if (es->flags & BGP_EVPNES_REMOTE) + strcpy(type_str + strlen(type_str), "R"); + + bgp_evpn_es_vteps_str(vtep_str, es); + if (!strlen(vtep_str)) + strcpy(vtep_str, "-"); + + if (es->flags & BGP_EVPNES_LOCAL) + prefix_rd2str(&es->prd, buf1, sizeof(buf1)); + else + strcpy(buf1, "-"); + + vty_out(vty, "ESI: %s\n", es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " RD: %s\n", buf1); + vty_out(vty, " Originator-IP: %s\n", + inet_ntoa(es->originator_ip)); + vty_out(vty, " VNI Count: %d\n", listcount(es->es_evi_list)); + vty_out(vty, " Remote VNI Count: %d\n", + es->remote_es_evi_cnt); + vty_out(vty, " Inconsistent VNI VTEP Count: %d\n", + es->incons_evi_vtep_cnt); + if (es->inconsistencies) { + incons_str[0] = '\0'; + if (es->inconsistencies & BGP_EVPNES_INCONS_VTEP_LIST) + strcpy(incons_str + strlen(incons_str), + "vni-vtep-mismatch"); + } else { + strcpy(incons_str, "-"); + } + vty_out(vty, " Inconsistencies: %s\n", + incons_str); + vty_out(vty, " VTEPs: %s\n", vtep_str); + vty_out(vty, "\n"); + } +} + +/* Display all ESs */ +void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail) +{ + struct bgp_evpn_es *es; + json_object *json_array; + json_object *json = NULL; + + if (uj) { + /* create an array of ESs */ + json_array = json_object_new_array(); + } else { + if (!detail) { + vty_out(vty, + "ES Flags: L local, R remote, I inconsistent\n"); + vty_out(vty, + "VTEP Flags: E ESR/Type-4, A active nexthop\n"); + vty_out(vty, + "%-30s %-5s %-21s %-8s %s\n", + "ESI", "Flags", "RD", "#VNIs", "VTEPs"); + } + } + + RB_FOREACH(es, bgp_es_rb_head, &bgp_mh_info->es_rb_tree) { + if (uj) + /* create a separate json object for each ES */ + json = json_object_new_object(); + if (detail) + bgp_evpn_es_show_entry_detail(vty, es, json); + else + bgp_evpn_es_show_entry(vty, es, json); + /* add ES to the json array */ + if (uj) + json_object_array_add(json_array, json); + } + + /* print the array of json-ESs */ + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + +/* Display specific ES */ +void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj) +{ + struct bgp_evpn_es *es; + json_object *json = NULL; + + if (uj) + json = json_object_new_object(); + + es = bgp_evpn_es_find(esi); + if (es) { + bgp_evpn_es_show_entry_detail(vty, es, json); + } else { + if (!uj) + vty_out(vty, "ESI not found\n"); + } + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json, JSON_C_TO_STRING_PRETTY)); + json_object_free(json); + } +} + +/*****************************************************************************/ +/* Ethernet Segment to EVI association - + * 1. The ES-EVI entry is maintained as a RB tree per L2-VNI + * (bgpevpn->es_evi_rb_tree). + * 2. Each local ES-EVI entry is rxed from zebra and then used by BGP to + * advertises an EAD-EVI (Type-1 EVPN) route + * 3. The remote ES-EVI is created when a bgp_evpn_es_evi_vtep references + * it. + */ + +/* A list of remote VTEPs is maintained for each ES-EVI. This list includes - + * 1. VTEPs for which we have imported the EAD-per-ES Type1 route + * 2. VTEPs for which we have imported the EAD-per-EVI Type1 route + * VTEPs for which both routes have been rxed are activated. Activation + * creates a NHG in the parent ES. + */ +static int bgp_evpn_es_evi_vtep_cmp(void *p1, void *p2) +{ + const struct bgp_evpn_es_evi_vtep *evi_vtep1 = p1; + const struct bgp_evpn_es_evi_vtep *evi_vtep2 = p2; + + return evi_vtep1->vtep_ip.s_addr - evi_vtep2->vtep_ip.s_addr; +} + +static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_new( + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + + evi_vtep = XCALLOC(MTYPE_BGP_EVPN_ES_EVI_VTEP, sizeof(*evi_vtep)); + + evi_vtep->es_evi = es_evi; + evi_vtep->vtep_ip.s_addr = vtep_ip.s_addr; + listnode_init(&evi_vtep->es_evi_listnode, evi_vtep); + listnode_add_sort(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode); + + return evi_vtep; +} + +static void bgp_evpn_es_evi_vtep_free(struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + struct bgp_evpn_es_evi *es_evi = evi_vtep->es_evi; + + if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD)) + /* as long as there is some reference we can't free it */ + return; + + list_delete_node(es_evi->es_evi_vtep_list, &evi_vtep->es_evi_listnode); + XFREE(MTYPE_BGP_EVPN_ES_EVI_VTEP, evi_vtep); +} + +/* check if VTEP is already part of the list */ +static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_vtep_find( + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip) +{ + struct listnode *node = NULL; + struct bgp_evpn_es_evi_vtep *evi_vtep; + + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) { + if (evi_vtep->vtep_ip.s_addr == vtep_ip.s_addr) + return evi_vtep; + } + return NULL; +} + +/* A VTEP can be added as "active" attach to an ES if EAD-per-ES and + * EAD-per-EVI routes are rxed from it. + */ +static void bgp_evpn_es_evi_vtep_re_eval_active(struct bgp *bgp, + struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + bool old_active; + bool new_active; + + old_active = !!CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + /* Both EAD-per-ES and EAD-per-EVI routes must be rxed from a PE + * before it can be activated. + */ + if ((evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD) == + BGP_EVPN_EVI_VTEP_EAD) + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + else + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + new_active = !!CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE); + + if (old_active == new_active) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("es %s evi %u vtep %s %s", + evi_vtep->es_evi->es->esi_str, + evi_vtep->es_evi->vpn->vni, + inet_ntoa(evi_vtep->vtep_ip), + new_active ? "active" : "inactive"); + + /* add VTEP to parent es */ + if (new_active) { + struct bgp_evpn_es_vtep *es_vtep; + + es_vtep = bgp_evpn_es_vtep_add(bgp, evi_vtep->es_evi->es, + evi_vtep->vtep_ip, false /*esr*/); + evi_vtep->es_vtep = es_vtep; + } else { + if (evi_vtep->es_vtep) { + bgp_evpn_es_vtep_do_del(bgp, evi_vtep->es_vtep, + false /*esr*/); + evi_vtep->es_vtep = NULL; + } + } + /* queue up the parent es for background consistency checks */ + bgp_evpn_es_cons_checks_pend_add(evi_vtep->es_evi->es); +} + +static void bgp_evpn_es_evi_vtep_add(struct bgp *bgp, + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip, + bool ead_es) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + + evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip); + + if (!evi_vtep) + evi_vtep = bgp_evpn_es_evi_vtep_new(es_evi, vtep_ip); + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add es %s evi %u vtep %s %s", + evi_vtep->es_evi->es->esi_str, + evi_vtep->es_evi->vpn->vni, + inet_ntoa(evi_vtep->vtep_ip), + ead_es ? "ead_es" : "ead_evi"); + + if (ead_es) + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES); + else + SET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + + bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); +} + +static void bgp_evpn_es_evi_vtep_del(struct bgp *bgp, + struct bgp_evpn_es_evi *es_evi, struct in_addr vtep_ip, + bool ead_es) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + + evi_vtep = bgp_evpn_es_evi_vtep_find(es_evi, vtep_ip); + if (!evi_vtep) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del es %s evi %u vtep %s %s", + evi_vtep->es_evi->es->esi_str, + evi_vtep->es_evi->vpn->vni, + inet_ntoa(evi_vtep->vtep_ip), + ead_es ? "ead_es" : "ead_evi"); + + if (ead_es) + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_ES); + else + UNSET_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_EAD_PER_EVI); + + bgp_evpn_es_evi_vtep_re_eval_active(bgp, evi_vtep); + bgp_evpn_es_evi_vtep_free(evi_vtep); +} + +/* compare ES-IDs for the ES-EVI RB tree maintained per-VNI */ +static int bgp_es_evi_rb_cmp(const struct bgp_evpn_es_evi *es_evi1, + const struct bgp_evpn_es_evi *es_evi2) +{ + return memcmp(&es_evi1->es->esi, &es_evi2->es->esi, ESI_BYTES); +} +RB_GENERATE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, bgp_es_evi_rb_cmp); + +/* find the ES-EVI in the per-L2-VNI RB tree */ +static struct bgp_evpn_es_evi *bgp_evpn_es_evi_find(struct bgp_evpn_es *es, + struct bgpevpn *vpn) +{ + struct bgp_evpn_es_evi es_evi; + + es_evi.es = es; + + return RB_FIND(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, &es_evi); +} + +/* allocate a new ES-EVI and insert it into the per-L2-VNI and per-ES + * tables. + */ +static struct bgp_evpn_es_evi *bgp_evpn_es_evi_new(struct bgp_evpn_es *es, + struct bgpevpn *vpn) +{ + struct bgp_evpn_es_evi *es_evi; + + es_evi = XCALLOC(MTYPE_BGP_EVPN_ES_EVI, sizeof(*es_evi)); + + es_evi->es = es; + es_evi->vpn = vpn; + + /* Initialise the VTEP list */ + es_evi->es_evi_vtep_list = list_new(); + listset_app_node_mem(es_evi->es_evi_vtep_list); + es_evi->es_evi_vtep_list->cmp = bgp_evpn_es_evi_vtep_cmp; + + /* insert into the VNI-ESI rb tree */ + if (RB_INSERT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi)) { + XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi); + return NULL; + } + + /* add to the ES's VNI list */ + listnode_init(&es_evi->es_listnode, es_evi); + listnode_add(es->es_evi_list, &es_evi->es_listnode); + + return es_evi; +} + +/* remove the ES-EVI from the per-L2-VNI and per-ES tables and free + * up the memory. + */ +static void bgp_evpn_es_evi_free(struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es *es = es_evi->es; + struct bgpevpn *vpn = es_evi->vpn; + + /* cannot free the element as long as there is a local or remote + * reference + */ + if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | BGP_EVPNES_EVI_REMOTE)) + return; + + /* remove from the ES's VNI list */ + list_delete_node(es->es_evi_list, &es_evi->es_listnode); + + /* remove from the VNI-ESI rb tree */ + RB_REMOVE(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree, es_evi); + + /* free the VTEP list */ + list_delete(&es_evi->es_evi_vtep_list); + + /* remove from the VNI-ESI rb tree */ + XFREE(MTYPE_BGP_EVPN_ES_EVI, es_evi); +} + +/* init local info associated with the ES-EVI */ +static void bgp_evpn_es_evi_local_info_set(struct bgp_evpn_es_evi *es_evi) +{ + struct bgpevpn *vpn = es_evi->vpn; + + if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + return; + + SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL); + listnode_init(&es_evi->l2vni_listnode, es_evi); + listnode_add(vpn->local_es_evi_list, &es_evi->l2vni_listnode); +} + +/* clear any local info associated with the ES-EVI */ +static void bgp_evpn_es_evi_local_info_clear(struct bgp_evpn_es_evi *es_evi) +{ + struct bgpevpn *vpn = es_evi->vpn; + + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + return; + + UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL); + list_delete_node(vpn->local_es_evi_list, &es_evi->l2vni_listnode); + + bgp_evpn_es_evi_free(es_evi); +} + +/* eval remote info associated with the ES */ +static void bgp_evpn_es_evi_remote_info_re_eval(struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es *es = es_evi->es; + + /* if there are remote VTEPs the ES-EVI is classified as "remote" */ + if (listcount(es_evi->es_evi_vtep_list)) { + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) { + SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE); + ++es->remote_es_evi_cnt; + /* set remote on the parent es */ + bgp_evpn_es_remote_info_re_eval(es); + } + } else { + if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE)) { + UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_REMOTE); + if (es->remote_es_evi_cnt) + --es->remote_es_evi_cnt; + bgp_evpn_es_evi_free(es_evi); + /* check if "remote" can be cleared from the + * parent es. + */ + bgp_evpn_es_remote_info_re_eval(es); + } + } +} + +static void bgp_evpn_local_es_evi_do_del(struct bgp_evpn_es_evi *es_evi) +{ + struct prefix_evpn p; + struct bgp_evpn_es *es = es_evi->es; + struct bgp *bgp; + + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + return; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del local es %s evi %u", + es_evi->es->esi_str, + es_evi->vpn->vni); + + bgp = bgp_get_evpn(); + + if (bgp) { + /* update EAD-ES with new list of VNIs */ + if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) { + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: EAD-ES route update failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, + es_evi->vpn->vni); + } + + /* withdraw and delete EAD-EVI */ + if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + if (bgp_evpn_type1_evi_route_delete(bgp, + es, es_evi->vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_DELETE, + "%u: EAD-EVI route deletion failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, + es_evi->vpn->vni); + } + } + + bgp_evpn_es_evi_local_info_clear(es_evi); +} + +int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni) +{ + struct bgpevpn *vpn; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + char buf[ESI_STR_LEN]; + + es = bgp_evpn_es_find(esi); + if (!es) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to deref VNI %d from ESI %s; ES not present", + bgp->vrf_id, vni, + esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + + vpn = bgp_evpn_lookup_vni(bgp, vni); + if (!vpn) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to deref VNI %d from ESI %s; VNI not present", + bgp->vrf_id, vni, es->esi_str); + return -1; + } + + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to deref VNI %d from ESI %s; ES-VNI not present", + bgp->vrf_id, vni, es->esi_str); + return -1; + } + + bgp_evpn_local_es_evi_do_del(es_evi); + return 0; +} + +/* Create ES-EVI and advertise the corresponding EAD routes */ +int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni) +{ + struct bgpevpn *vpn; + struct prefix_evpn p; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + char buf[ESI_STR_LEN]; + + es = bgp_evpn_es_find(esi); + if (!es) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to associate VNI %d with ESI %s; ES not present", + bgp->vrf_id, vni, + esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + + vpn = bgp_evpn_lookup_vni(bgp, vni); + if (!vpn) { + flog_err( + EC_BGP_ES_CREATE, + "%u: Failed to associate VNI %d with ESI %s; VNI not present", + bgp->vrf_id, vni, es->esi_str); + return -1; + } + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add local es %s evi %u", + es->esi_str, vni); + + es_evi = bgp_evpn_es_evi_find(es, vpn); + + if (es_evi) { + if (CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_LOCAL)) + /* dup */ + return 0; + } else { + es_evi = bgp_evpn_es_evi_new(es, vpn); + if (!es_evi) + return -1; + } + + bgp_evpn_es_evi_local_info_set(es_evi); + + /* generate an EAD-EVI for this new VNI */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_EVI_ETH_TAG, + &es->esi, es->originator_ip); + if (CHECK_FLAG(es->flags, BGP_EVPNES_ADV_EVI)) { + if (bgp_evpn_type1_route_update(bgp, es, vpn, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: EAD-EVI route creation failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, vni); + } + + /* update EAD-ES */ + build_evpn_type1_prefix(&p, BGP_EVPN_AD_ES_ETH_TAG, + &es->esi, es->originator_ip); + if (CHECK_FLAG(es->flags, BGP_EVPNES_OPER_UP)) { + if (bgp_evpn_type1_route_update(bgp, es, NULL, &p)) + flog_err(EC_BGP_EVPN_ROUTE_CREATE, + "%u: EAD-ES route creation failure for ESI %s VNI %u", + bgp->vrf_id, es->esi_str, vni); + } + + return 0; +} + +/* Add remote ES-EVI entry. This is actually the remote VTEP add and the + * ES-EVI is implicity created on first VTEP's reference. + */ +int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p) +{ + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + bool ead_es; + const esi_t *esi = &p->prefix.ead_addr.esi; + + if (!vpn) + /* local EAD-ES need not be sent back to zebra */ + return 0; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("add remote %s es %s evi %u vtep %s", + p->prefix.ead_addr.eth_tag ? + "ead-es" : "ead-evi", + esi_to_str(esi, buf, + sizeof(buf)), + vpn->vni, + inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4)); + + es = bgp_evpn_es_find(esi); + if (!es) { + es = bgp_evpn_es_new(bgp, esi); + if (!es) { + flog_err(EC_BGP_ES_CREATE, + "%u: Failed to allocate ES entry for ESI %s - at remote ES Add", + bgp->vrf_id, esi_to_str(esi, buf, sizeof(buf))); + return -1; + } + } + + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) { + es_evi = bgp_evpn_es_evi_new(es, vpn); + if (!es_evi) { + bgp_evpn_es_free(es); + return -1; + } + } + + ead_es = !!p->prefix.ead_addr.eth_tag; + bgp_evpn_es_evi_vtep_add(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4, + ead_es); + + bgp_evpn_es_evi_remote_info_re_eval(es_evi); + return 0; +} + +/* A remote VTEP has withdrawn. The es-evi-vtep will be deleted and the + * parent es-evi freed up implicitly in last VTEP's deref. + */ +int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p) +{ + char buf[ESI_STR_LEN]; + struct bgp_evpn_es *es; + struct bgp_evpn_es_evi *es_evi; + bool ead_es; + + if (!vpn) + /* local EAD-ES need not be sent back to zebra */ + return 0; + + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("del remote %s es %s evi %u vtep %s", + p->prefix.ead_addr.eth_tag ? + "ead-es" : "ead-evi", + esi_to_str(&p->prefix.ead_addr.esi, buf, + sizeof(buf)), + vpn->vni, + inet_ntoa(p->prefix.ead_addr.ip.ipaddr_v4)); + + es = bgp_evpn_es_find(&p->prefix.ead_addr.esi); + if (!es) + /* XXX - error logs */ + return 0; + es_evi = bgp_evpn_es_evi_find(es, vpn); + if (!es_evi) + /* XXX - error logs */ + return 0; + + ead_es = !!p->prefix.ead_addr.eth_tag; + bgp_evpn_es_evi_vtep_del(bgp, es_evi, p->prefix.ead_addr.ip.ipaddr_v4, + ead_es); + bgp_evpn_es_evi_remote_info_re_eval(es_evi); + return 0; +} + +/* Initialize the ES tables maintained per-L2_VNI */ +void bgp_evpn_vni_es_init(struct bgpevpn *vpn) +{ + /* Initialize the ES-EVI RB tree */ + RB_INIT(bgp_es_evi_rb_head, &vpn->es_evi_rb_tree); + + /* Initialize the local list maintained for quick walks by type */ + vpn->local_es_evi_list = list_new(); + listset_app_node_mem(vpn->local_es_evi_list); +} + +/* Cleanup the ES info maintained per-L2_VNI */ +void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn) +{ + struct bgp_evpn_es_evi *es_evi; + struct bgp_evpn_es_evi *es_evi_next; + + RB_FOREACH_SAFE(es_evi, bgp_es_evi_rb_head, + &vpn->es_evi_rb_tree, es_evi_next) { + bgp_evpn_local_es_evi_do_del(es_evi); + } + + list_delete(&vpn->local_es_evi_list); +} + +static char *bgp_evpn_es_evi_vteps_str(char *vtep_str, + struct bgp_evpn_es_evi *es_evi) +{ + char vtep_flag_str[BGP_EVPN_FLAG_STR_SZ]; + struct listnode *node; + struct bgp_evpn_es_evi_vtep *evi_vtep; + bool first = true; + + vtep_str[0] = '\0'; + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) { + vtep_flag_str[0] = '\0'; + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES) + strcpy(vtep_flag_str + strlen(vtep_flag_str), "E"); + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI) + strcpy(vtep_flag_str + strlen(vtep_flag_str), "V"); + + if (!strlen(vtep_flag_str)) + strcpy(vtep_flag_str, "-"); + if (first) { + first = false; + sprintf(vtep_str + strlen(vtep_str), "%s(%s)", + inet_ntoa(evi_vtep->vtep_ip), + vtep_flag_str); + } else { + sprintf(vtep_str + strlen(vtep_str), ",%s(%s)", + inet_ntoa(evi_vtep->vtep_ip), + vtep_flag_str); + } + } + + return vtep_str; +} + +static void bgp_evpn_es_evi_json_vtep_fill(json_object *json_vteps, + struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + json_object *json_vtep_entry; + json_object *json_flags; + + json_vtep_entry = json_object_new_object(); + + json_object_string_add(json_vtep_entry, + "vtep_ip", + inet_ntoa(evi_vtep->vtep_ip)); + if (evi_vtep->flags & (BGP_EVPN_EVI_VTEP_EAD_PER_ES | + BGP_EVPN_EVI_VTEP_EAD_PER_EVI)) { + json_flags = json_object_new_array(); + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_ES) + json_array_string_add(json_flags, "ead-per-es"); + if (evi_vtep->flags & BGP_EVPN_EVI_VTEP_EAD_PER_EVI) + json_array_string_add(json_flags, "ed-per-evi"); + json_object_object_add(json_vtep_entry, + "flags", json_flags); + } + + json_object_array_add(json_vteps, + json_vtep_entry); +} + +static void bgp_evpn_es_evi_show_entry(struct vty *vty, + struct bgp_evpn_es_evi *es_evi, json_object *json) +{ + struct listnode *node; + struct bgp_evpn_es_evi_vtep *evi_vtep; + + if (json) { + json_object *json_vteps; + json_object *json_types; + + json_object_string_add(json, "esi", es_evi->es->esi_str); + json_object_int_add(json, "vni", es_evi->vpn->vni); + + if (es_evi->flags & (BGP_EVPNES_EVI_LOCAL | + BGP_EVPNES_EVI_REMOTE)) { + json_types = json_object_new_array(); + if (es_evi->flags & BGP_EVPNES_EVI_LOCAL) + json_array_string_add(json_types, "local"); + if (es_evi->flags & BGP_EVPNES_EVI_REMOTE) + json_array_string_add(json_types, "remote"); + json_object_object_add(json, "type", json_types); + } + + if (listcount(es_evi->es_evi_vtep_list)) { + json_vteps = json_object_new_array(); + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, + node, evi_vtep)) { + bgp_evpn_es_evi_json_vtep_fill(json_vteps, + evi_vtep); + } + json_object_object_add(json, "vteps", json_vteps); + } + } else { + char type_str[4]; + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + + type_str[0] = '\0'; + if (es_evi->flags & BGP_EVPNES_EVI_LOCAL) + strcpy(type_str + strlen(type_str), "L"); + if (es_evi->flags & BGP_EVPNES_EVI_REMOTE) + strcpy(type_str + strlen(type_str), "R"); + if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) + strcpy(type_str + strlen(type_str), "I"); + + bgp_evpn_es_evi_vteps_str(vtep_str, es_evi); + + vty_out(vty, "%-8d %-30s %-5s %s\n", + es_evi->vpn->vni, es_evi->es->esi_str, + type_str, vtep_str); + } +} + +static void bgp_evpn_es_evi_show_entry_detail(struct vty *vty, + struct bgp_evpn_es_evi *es_evi, json_object *json) +{ + if (json) { + json_object *json_flags; + + /* Add the "brief" info first */ + bgp_evpn_es_evi_show_entry(vty, es_evi, json); + if (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) { + json_flags = json_object_new_array(); + json_array_string_add(json_flags, "es-vtep-mismatch"); + json_object_object_add(json, "flags", json_flags); + } + } else { + char vtep_str[ES_VTEP_LIST_STR_SZ + BGP_EVPN_VTEPS_FLAG_STR_SZ]; + char type_str[4]; + + type_str[0] = '\0'; + if (es_evi->flags & BGP_EVPNES_EVI_LOCAL) + strcpy(type_str + strlen(type_str), "L"); + if (es_evi->flags & BGP_EVPNES_EVI_REMOTE) + strcpy(type_str + strlen(type_str), "R"); + + bgp_evpn_es_evi_vteps_str(vtep_str, es_evi); + if (!strlen(vtep_str)) + strcpy(vtep_str, "-"); + + vty_out(vty, "VNI: %d ESI: %s\n", + es_evi->vpn->vni, es_evi->es->esi_str); + vty_out(vty, " Type: %s\n", type_str); + vty_out(vty, " Inconsistencies: %s\n", + (es_evi->flags & BGP_EVPNES_EVI_INCONS_VTEP_LIST) ? + "es-vtep-mismatch":"-"); + vty_out(vty, " VTEPs: %s\n", vtep_str); + vty_out(vty, "\n"); + } +} + +static void bgp_evpn_es_evi_show_one_vni(struct bgpevpn *vpn, struct vty *vty, + json_object *json_array, bool detail) +{ + struct bgp_evpn_es_evi *es_evi; + json_object *json = NULL; + + RB_FOREACH(es_evi, bgp_es_evi_rb_head, &vpn->es_evi_rb_tree) { + if (json_array) + /* create a separate json object for each ES */ + json = json_object_new_object(); + if (detail) + bgp_evpn_es_evi_show_entry_detail(vty, es_evi, json); + else + bgp_evpn_es_evi_show_entry(vty, es_evi, json); + /* add ES to the json array */ + if (json_array) + json_object_array_add(json_array, json); + } +} + +struct es_evi_show_ctx { + struct vty *vty; + json_object *json; + int detail; +}; + +static void bgp_evpn_es_evi_show_one_vni_hash_cb(struct hash_bucket *bucket, + void *ctxt) +{ + struct bgpevpn *vpn = (struct bgpevpn *)bucket->data; + struct es_evi_show_ctx *wctx = (struct es_evi_show_ctx *)ctxt; + + bgp_evpn_es_evi_show_one_vni(vpn, wctx->vty, wctx->json, wctx->detail); +} + +/* Display all ES EVIs */ +void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail) +{ + json_object *json_array = NULL; + struct es_evi_show_ctx wctx; + struct bgp *bgp; + + if (uj) { + /* create an array of ES-EVIs */ + json_array = json_object_new_array(); + } + + wctx.vty = vty; + wctx.json = json_array; + wctx.detail = detail; + + bgp = bgp_get_evpn(); + + if (!json_array && !detail) { + vty_out(vty, "Flags: L local, R remote, I inconsistent\n"); + vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n"); + vty_out(vty, "%-8s %-30s %-5s %s\n", + "VNI", "ESI", "Flags", "VTEPs"); + } + + if (bgp) + hash_iterate(bgp->vnihash, + (void (*)(struct hash_bucket *, + void *))bgp_evpn_es_evi_show_one_vni_hash_cb, + &wctx); + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + +/* Display specific ES EVI */ +void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni, + bool uj, bool detail) +{ + struct bgpevpn *vpn = NULL; + json_object *json_array = NULL; + struct bgp *bgp; + + if (uj) { + /* create an array of ES-EVIs */ + json_array = json_object_new_array(); + } + + bgp = bgp_get_evpn(); + if (bgp) + vpn = bgp_evpn_lookup_vni(bgp, vni); + + if (vpn) { + if (!json_array && !detail) { + vty_out(vty, "Flags: L local, R remote, I inconsistent\n"); + vty_out(vty, "VTEP-Flags: E EAD-per-ES, V EAD-per-EVI\n"); + vty_out(vty, "%-8s %-30s %-5s %s\n", + "VNI", "ESI", "Flags", "VTEPs"); + } + + bgp_evpn_es_evi_show_one_vni(vpn, vty, json_array, detail); + } else { + if (!uj) + vty_out(vty, "VNI not found\n"); + } + + if (uj) { + vty_out(vty, "%s\n", json_object_to_json_string_ext( + json_array, JSON_C_TO_STRING_PRETTY)); + json_object_free(json_array); + } +} + +/***************************************************************************** + * Ethernet Segment Consistency checks + * Consistency checking is done to detect misconfig or mis-cabling. When + * an inconsistency is detected it is simply logged (and displayed via + * show commands) at this point. A more drastic action can be executed (based + * on user config) in the future. + */ +/* queue up the es for background consistency checks */ +static void bgp_evpn_es_cons_checks_pend_add(struct bgp_evpn_es *es) +{ + if (!bgp_mh_info->consistency_checking) + /* consistency checking is not enabled */ + return; + + if (CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND)) + /* already queued for consistency checking */ + return; + + SET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND); + listnode_init(&es->pend_es_listnode, es); + listnode_add_after(bgp_mh_info->pend_es_list, + listtail_unchecked(bgp_mh_info->pend_es_list), + &es->pend_es_listnode); +} + +/* pull the ES from the consistency check list */ +static void bgp_evpn_es_cons_checks_pend_del(struct bgp_evpn_es *es) +{ + if (!CHECK_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND)) + return; + + UNSET_FLAG(es->flags, BGP_EVPNES_CONS_CHECK_PEND); + list_delete_node(bgp_mh_info->pend_es_list, + &es->pend_es_listnode); +} + +/* Number of active VTEPs associated with the ES-per-EVI */ +static uint32_t bgp_evpn_es_evi_get_active_vtep_cnt( + struct bgp_evpn_es_evi *es_evi) +{ + struct bgp_evpn_es_evi_vtep *evi_vtep; + struct listnode *node; + uint32_t vtep_cnt = 0; + + for (ALL_LIST_ELEMENTS_RO(es_evi->es_evi_vtep_list, node, evi_vtep)) { + if (CHECK_FLAG(evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE)) + ++vtep_cnt; + } + + return vtep_cnt; +} + +/* Number of active VTEPs associated with the ES */ +static uint32_t bgp_evpn_es_get_active_vtep_cnt(struct bgp_evpn_es *es) +{ + struct listnode *node; + uint32_t vtep_cnt = 0; + struct bgp_evpn_es_vtep *es_vtep; + + for (ALL_LIST_ELEMENTS_RO(es->es_vtep_list, node, es_vtep)) { + if (CHECK_FLAG(es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE)) + ++vtep_cnt; + } + + return vtep_cnt; +} + +static struct bgp_evpn_es_vtep *bgp_evpn_es_get_next_active_vtep( + struct bgp_evpn_es *es, struct bgp_evpn_es_vtep *es_vtep) +{ + struct listnode *node; + struct bgp_evpn_es_vtep *next_es_vtep; + + if (es_vtep) + node = listnextnode_unchecked(&es_vtep->es_listnode); + else + node = listhead(es->es_vtep_list); + + for (; node; node = listnextnode_unchecked(node)) { + next_es_vtep = listgetdata(node); + if (CHECK_FLAG(next_es_vtep->flags, BGP_EVPNES_VTEP_ACTIVE)) + return next_es_vtep; + } + + return NULL; +} + +static struct bgp_evpn_es_evi_vtep *bgp_evpn_es_evi_get_next_active_vtep( + struct bgp_evpn_es_evi *es_evi, + struct bgp_evpn_es_evi_vtep *evi_vtep) +{ + struct listnode *node; + struct bgp_evpn_es_evi_vtep *next_evi_vtep; + + if (evi_vtep) + node = listnextnode_unchecked(&evi_vtep->es_evi_listnode); + else + node = listhead(es_evi->es_evi_vtep_list); + + for (; node; node = listnextnode_unchecked(node)) { + next_evi_vtep = listgetdata(node); + if (CHECK_FLAG(next_evi_vtep->flags, BGP_EVPN_EVI_VTEP_ACTIVE)) + return next_evi_vtep; + } + + return NULL; +} + +static void bgp_evpn_es_evi_set_inconsistent(struct bgp_evpn_es_evi *es_evi) +{ + if (!CHECK_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST)) { + if (BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("inconsistency detected - es %s evi %u vtep list mismatch", + es_evi->es->esi_str, + es_evi->vpn->vni); + SET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST); + + /* update parent ES with the incosistency setting */ + if (!es_evi->es->incons_evi_vtep_cnt && + BGP_DEBUG(evpn_mh, EVPN_MH_ES)) + zlog_debug("inconsistency detected - es %s vtep list mismatch", + es_evi->es->esi_str); + ++es_evi->es->incons_evi_vtep_cnt; + SET_FLAG(es_evi->es->inconsistencies, + BGP_EVPNES_INCONS_VTEP_LIST); + } +} + +static uint32_t bgp_evpn_es_run_consistency_checks(struct bgp_evpn_es *es) +{ + int proc_cnt = 0; + int es_active_vtep_cnt; + int evi_active_vtep_cnt; + struct bgp_evpn_es_evi *es_evi; + struct listnode *evi_node; + struct bgp_evpn_es_vtep *es_vtep; + struct bgp_evpn_es_evi_vtep *evi_vtep; + + /* reset the inconsistencies and re-evaluate */ + es->incons_evi_vtep_cnt = 0; + es->inconsistencies = 0; + + es_active_vtep_cnt = bgp_evpn_es_get_active_vtep_cnt(es); + for (ALL_LIST_ELEMENTS_RO(es->es_evi_list, + evi_node, es_evi)) { + ++proc_cnt; + + /* reset the inconsistencies on the EVI and re-evaluate*/ + UNSET_FLAG(es_evi->flags, BGP_EVPNES_EVI_INCONS_VTEP_LIST); + + evi_active_vtep_cnt = + bgp_evpn_es_evi_get_active_vtep_cnt(es_evi); + if (es_active_vtep_cnt != evi_active_vtep_cnt) { + bgp_evpn_es_evi_set_inconsistent(es_evi); + continue; + } + + if (!es_active_vtep_cnt) + continue; + + es_vtep = NULL; + evi_vtep = NULL; + while ((es_vtep = bgp_evpn_es_get_next_active_vtep( + es, es_vtep))) { + evi_vtep = bgp_evpn_es_evi_get_next_active_vtep(es_evi, + evi_vtep); + if (!evi_vtep) { + bgp_evpn_es_evi_set_inconsistent(es_evi); + break; + } + if (es_vtep->vtep_ip.s_addr != + evi_vtep->vtep_ip.s_addr) { + /* inconsistency detected; set it and move + * to the next evi + */ + bgp_evpn_es_evi_set_inconsistent(es_evi); + break; + } + } + } + + return proc_cnt; +} + +static int bgp_evpn_run_consistency_checks(struct thread *t) +{ + int proc_cnt = 0; + int es_cnt = 0; + struct listnode *node; + struct listnode *nextnode; + struct bgp_evpn_es *es; + + for (ALL_LIST_ELEMENTS(bgp_mh_info->pend_es_list, + node, nextnode, es)) { + ++es_cnt; + ++proc_cnt; + /* run consistency checks on the ES and remove it from the + * pending list + */ + proc_cnt += bgp_evpn_es_run_consistency_checks(es); + bgp_evpn_es_cons_checks_pend_del(es); + if (proc_cnt > 500) + break; + } + + /* restart the timer */ + thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, NULL, + BGP_EVPN_CONS_CHECK_INTERVAL, + &bgp_mh_info->t_cons_check); + + return 0; +} + +/*****************************************************************************/ +void bgp_evpn_mh_init(void) +{ + bm->mh_info = XCALLOC(MTYPE_BGP_EVPN_MH_INFO, sizeof(*bm->mh_info)); + + /* setup ES tables */ + RB_INIT(bgp_es_rb_head, &bgp_mh_info->es_rb_tree); + /* local ES list */ + bgp_mh_info->local_es_list = list_new(); + listset_app_node_mem(bgp_mh_info->local_es_list); + /* list of ESs with pending processing */ + bgp_mh_info->pend_es_list = list_new(); + listset_app_node_mem(bgp_mh_info->pend_es_list); + + /* config knobs - XXX add cli to control it */ + bgp_mh_info->ead_evi_adv_for_down_links = true; + bgp_mh_info->consistency_checking = true; + + if (bgp_mh_info->consistency_checking) + thread_add_timer(bm->master, bgp_evpn_run_consistency_checks, + NULL, BGP_EVPN_CONS_CHECK_INTERVAL, + &bgp_mh_info->t_cons_check); + + memset(&zero_esi_buf, 0, sizeof(esi_t)); +} + +void bgp_evpn_mh_finish(void) +{ + struct bgp_evpn_es *es; + struct bgp_evpn_es *es_next; + struct bgp *bgp; + + bgp = bgp_get_evpn(); + if (bgp) { + RB_FOREACH_SAFE(es, bgp_es_rb_head, + &bgp_mh_info->es_rb_tree, es_next) { + /* XXX - need to force free remote ESs here */ + bgp_evpn_local_es_do_del(bgp, es); + } + } + thread_cancel(bgp_mh_info->t_cons_check); + list_delete(&bgp_mh_info->local_es_list); + list_delete(&bgp_mh_info->pend_es_list); + + XFREE(MTYPE_BGP_EVPN_MH_INFO, bgp_mh_info); +} diff --git a/bgpd/bgp_evpn_mh.h b/bgpd/bgp_evpn_mh.h index 868fe749f..1dcdfc3cf 100644 --- a/bgpd/bgp_evpn_mh.h +++ b/bgpd/bgp_evpn_mh.h @@ -1,6 +1,7 @@ /* EVPN header for multihoming procedures * * Copyright (C) 2019 Cumulus Networks + * Anuradha Karuppiah * * This file is part of FRRouting. * @@ -24,17 +25,236 @@ #include "bgp_evpn.h" #include "bgp_evpn_private.h" -extern unsigned int esi_hash_keymake(const void *p); -extern bool esi_cmp(const void *p1, const void *p2); -extern int install_uninstall_route_in_es(struct bgp *bgp, struct evpnes *es, - afi_t afi, safi_t safi, - struct prefix_evpn *evp, - struct bgp_path_info *pi, int install); -int process_type4_route(struct peer *peer, afi_t afi, safi_t safi, - struct attr *attr, uint8_t *pfx, int psize, - uint32_t addpath_id); +#define BGP_EVPN_AD_ES_ETH_TAG 0xffffffff +#define BGP_EVPN_AD_EVI_ETH_TAG 0 + +#define BGP_EVPNES_INCONS_STR_SZ 80 +#define BGP_EVPN_FLAG_STR_SZ 5 +#define BGP_EVPN_VTEPS_FLAG_STR_SZ (BGP_EVPN_FLAG_STR_SZ * ES_VTEP_MAX_CNT) + +#define BGP_EVPN_CONS_CHECK_INTERVAL 60 + + +/* Ethernet Segment entry - + * - Local and remote ESs are maintained in a global RB tree, + * bgp_mh_info->es_rb_tree using ESI as key + * - Local ESs are received from zebra (BGP_EVPNES_LOCAL) + * - Remotes ESs are implicitly created (by reference) by a remote ES-EVI + * (BGP_EVPNES_REMOTE) + * - An ES can be simulatenously LOCAL and REMOTE; infact all LOCAL ESs are + * expected to have REMOTE ES peers. + */ +struct bgp_evpn_es { + /* Ethernet Segment Identifier */ + esi_t esi; + char esi_str[ESI_STR_LEN]; + + /* es flags */ + uint32_t flags; + /* created via zebra config */ +#define BGP_EVPNES_LOCAL (1 << 0) + /* created implicitly by a remote ES-EVI reference */ +#define BGP_EVPNES_REMOTE (1 << 1) + /* local ES link is oper-up */ +#define BGP_EVPNES_OPER_UP (1 << 2) + /* enable generation of EAD-EVI routes */ +#define BGP_EVPNES_ADV_EVI (1 << 3) + /* consistency checks pending */ +#define BGP_EVPNES_CONS_CHECK_PEND (1 << 4) + + /* memory used for adding the es to bgp->es_rb_tree */ + RB_ENTRY(bgp_evpn_es) rb_node; + + /* [EVPNES_LOCAL] memory used for linking the es to + * bgp_mh_info->local_es_list + */ + struct listnode es_listnode; + + /* memory used for linking the es to "processing" pending list + * bgp_mh_info->pend_es_list + */ + struct listnode pend_es_listnode; + + /* [EVPNES_LOCAL] Id for deriving the RD automatically for this ESI */ + uint16_t rd_id; + + /* [EVPNES_LOCAL] RD for this ES */ + struct prefix_rd prd; + + /* [EVPNES_LOCAL] originator ip address */ + struct in_addr originator_ip; + + /* [EVPNES_LOCAL] Route table for EVPN routes for this ESI- + * - Type-4 local and remote routes + * - Type-1 local routes + */ + struct bgp_table *route_table; + + /* list of PEs (bgp_evpn_es_vtep) attached to the ES */ + struct list *es_vtep_list; + + /* List of ES-EVIs associated with this ES */ + struct list *es_evi_list; + + /* Number of remote VNIs referencing this ES */ + uint32_t remote_es_evi_cnt; + + uint32_t inconsistencies; + /* there are one or more EVIs whose VTEP list doesn't match + * with the ES's VTEP list + */ +#define BGP_EVPNES_INCONS_VTEP_LIST (1 << 0) + + /* number of es-evi entries whose VTEP list doesn't match + * with the ES's + */ + uint32_t incons_evi_vtep_cnt; + + QOBJ_FIELDS +}; +DECLARE_QOBJ_TYPE(bgp_evpn_es) +RB_HEAD(bgp_es_rb_head, bgp_evpn_es); +RB_PROTOTYPE(bgp_es_rb_head, bgp_evpn_es, rb_node, bgp_es_rb_cmp); + +/* PE attached to an ES */ +struct bgp_evpn_es_vtep { + struct bgp_evpn_es *es; /* parent ES */ + struct in_addr vtep_ip; + + uint32_t flags; + /* Rxed a Type4 route from this PE */ +#define BGP_EVPNES_VTEP_ESR (1 << 0) + /* Active (rxed EAD-ES and EAD-EVI) and can be included as + * a nexthop + */ +#define BGP_EVPNES_VTEP_ACTIVE (1 << 1) + + uint32_t evi_cnt; /* es_evis referencing this vtep as an active path */ + + /* memory used for adding the entry to es->es_vtep_list */ + struct listnode es_listnode; +}; + +/* ES per-EVI info + * - ES-EVIs are maintained per-L2-VNI (vpn->es_evi_rb_tree) + * - ES-EVIs are also linked to the parent ES (es->es_evi_list) + * - Local ES-EVIs are created by zebra (via config). They are linked to a + * per-VNI list (vpn->local_es_evi_list) for quick access + * - Remote ES-EVIs are created implicitly when a bgp_evpn_es_evi_vtep + * references it. + */ +struct bgp_evpn_es_evi { + struct bgp_evpn_es *es; + struct bgpevpn *vpn; + + /* ES-EVI flags */ + uint32_t flags; +/* local ES-EVI, created by zebra */ +#define BGP_EVPNES_EVI_LOCAL (1 << 0) +/* created via a remote VTEP imported by BGP */ +#define BGP_EVPNES_EVI_REMOTE (1 << 1) +#define BGP_EVPNES_EVI_INCONS_VTEP_LIST (1 << 2) + + /* memory used for adding the es_evi to es_evi->vpn->es_evi_rb_tree */ + RB_ENTRY(bgp_evpn_es_evi) rb_node; + /* memory used for linking the es_evi to + * es_evi->vpn->local_es_evi_list + */ + struct listnode l2vni_listnode; + /* memory used for linking the es_evi to + * es_evi->es->es_evi_list + */ + struct listnode es_listnode; + + /* list of PEs (bgp_evpn_es_evi_vtep) attached to the ES for this VNI */ + struct list *es_evi_vtep_list; +}; + +/* PE attached to an ES for a VNI. This entry is created when an EAD-per-ES + * or EAD-per-EVI Type1 route is imported into the VNI. + */ +struct bgp_evpn_es_evi_vtep { + struct bgp_evpn_es_evi *es_evi; /* parent ES-EVI */ + struct in_addr vtep_ip; + + uint32_t flags; + /* Rxed an EAD-per-ES route from the PE */ +#define BGP_EVPN_EVI_VTEP_EAD_PER_ES (1 << 0) /* rxed EAD-per-ES */ + /* Rxed an EAD-per-EVI route from the PE */ +#define BGP_EVPN_EVI_VTEP_EAD_PER_EVI (1 << 1) /* rxed EAD-per-EVI */ + /* VTEP is active i.e. will result in the creation of an es-vtep */ +#define BGP_EVPN_EVI_VTEP_ACTIVE (1 << 2) +#define BGP_EVPN_EVI_VTEP_EAD (BGP_EVPN_EVI_VTEP_EAD_PER_ES |\ + BGP_EVPN_EVI_VTEP_EAD_PER_EVI) + + /* memory used for adding the entry to es_evi->es_evi_vtep_list */ + struct listnode es_evi_listnode; + struct bgp_evpn_es_vtep *es_vtep; +}; + +/* multihoming information stored in bgp_master */ +#define bgp_mh_info (bm->mh_info) +struct bgp_evpn_mh_info { + /* RB tree of Ethernet segments (used for EVPN-MH) */ + struct bgp_es_rb_head es_rb_tree; + /* List of local ESs */ + struct list *local_es_list; + /* List of ESs with pending/periodic processing */ + struct list *pend_es_list; + /* periodic timer for running background consistency checks */ + struct thread *t_cons_check; + + /* config knobs for optimizing or interop */ + /* Generate EAD-EVI routes even if the ES is oper-down. This can be + * enabled as an optimization to avoid a storm of updates when an ES + * link flaps. + */ + bool ead_evi_adv_for_down_links; + /* Enable ES consistency checking */ + bool consistency_checking; +}; + +/****************************************************************************/ +static inline int is_es_local(struct bgp_evpn_es *es) +{ + return CHECK_FLAG(es->flags, BGP_EVPNES_LOCAL) ? 1 : 0; +} + +extern esi_t *zero_esi; +static inline esi_t *bgp_evpn_attr_get_esi(struct attr *attr) +{ + return (attr) ? &attr->esi : zero_esi; +} + +/****************************************************************************/ +extern int bgp_evpn_es_route_install_uninstall(struct bgp *bgp, + struct bgp_evpn_es *es, afi_t afi, safi_t safi, + struct prefix_evpn *evp, struct bgp_path_info *pi, + int install); +int bgp_evpn_type1_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id); +int bgp_evpn_type4_route_process(struct peer *peer, afi_t afi, safi_t safi, + struct attr *attr, uint8_t *pfx, int psize, + uint32_t addpath_id); extern int bgp_evpn_local_es_add(struct bgp *bgp, esi_t *esi, - struct ipaddr *originator_ip); -extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi, - struct ipaddr *originator_ip); + struct in_addr originator_ip, bool oper_up); +extern int bgp_evpn_local_es_del(struct bgp *bgp, esi_t *esi); +extern int bgp_evpn_local_es_evi_add(struct bgp *bgp, esi_t *esi, vni_t vni); +extern int bgp_evpn_local_es_evi_del(struct bgp *bgp, esi_t *esi, vni_t vni); +extern int bgp_evpn_remote_es_evi_add(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p); +extern int bgp_evpn_remote_es_evi_del(struct bgp *bgp, struct bgpevpn *vpn, + const struct prefix_evpn *p); +extern void bgp_evpn_mh_init(void); +extern void bgp_evpn_mh_finish(void); +void bgp_evpn_vni_es_init(struct bgpevpn *vpn); +void bgp_evpn_vni_es_cleanup(struct bgpevpn *vpn); +void bgp_evpn_es_show_esi(struct vty *vty, esi_t *esi, bool uj); +void bgp_evpn_es_show(struct vty *vty, bool uj, bool detail); +void bgp_evpn_es_evi_show_vni(struct vty *vty, vni_t vni, + bool uj, bool detail); +void bgp_evpn_es_evi_show(struct vty *vty, bool uj, bool detail); +struct bgp_evpn_es *bgp_evpn_es_find(const esi_t *esi); + #endif /* _FRR_BGP_EVPN_MH_H */ diff --git a/bgpd/bgp_evpn_private.h b/bgpd/bgp_evpn_private.h index 839785d3a..4ac626e4c 100644 --- a/bgpd/bgp_evpn_private.h +++ b/bgpd/bgp_evpn_private.h @@ -34,6 +34,20 @@ * in bits */ #define EVPN_ROUTE_PREFIXLEN (sizeof(struct evpn_addr) * 8) +/* EVPN route RD buffer length */ +#define BGP_EVPN_PREFIX_RD_LEN 100 + +/* packet sizes for EVPN routes */ +/* Type-1 route should be 25 bytes + * RD (8), ESI (10), eth-tag (4), vni (3) + */ +#define BGP_EVPN_TYPE1_PSIZE 25 +/* Type-4 route should be either 23 or 35 bytes + * RD (8), ESI (10), ip-len (1), ip (4 or 16) + */ +#define BGP_EVPN_TYPE4_V4_PSIZE 23 +#define BGP_EVPN_TYPE4_V6_PSIZE 34 + /* EVPN route types. */ typedef enum { BGP_EVPN_AD_ROUTE = 1, /* Ethernet Auto-Discovery (A-D) route */ @@ -43,6 +57,9 @@ typedef enum { BGP_EVPN_IP_PREFIX_ROUTE, /* IP Prefix route */ } bgp_evpn_route_type; +RB_HEAD(bgp_es_evi_rb_head, bgp_evpn_es_evi); +RB_PROTOTYPE(bgp_es_evi_rb_head, bgp_evpn_es_evi, rb_node, + bgp_es_evi_rb_cmp); /* * Hash table of EVIs. Right now, the only type of EVI supported is with * VxLAN encapsulation, hence each EVI corresponds to a L2 VNI. @@ -98,46 +115,16 @@ struct bgpevpn { * this VNI. */ struct bgp_table *route_table; - QOBJ_FIELDS -}; - -DECLARE_QOBJ_TYPE(bgpevpn) - -struct evpnes { - - /* Ethernet Segment Identifier */ - esi_t esi; - - /* es flags */ - uint16_t flags; -#define EVPNES_LOCAL 0x01 -#define EVPNES_REMOTE 0x02 - - /* - * Id for deriving the RD - * automatically for this ESI - */ - uint16_t rd_id; - - /* RD for this VNI. */ - struct prefix_rd prd; + /* RB tree of ES-EVIs */ + struct bgp_es_evi_rb_head es_evi_rb_tree; - /* originator ip address */ - struct ipaddr originator_ip; - - /* list of VTEPs in the same site */ - struct list *vtep_list; - - /* - * Route table for EVPN routes for - * this ESI. - type4 routes - */ - struct bgp_table *route_table; + /* List of local ESs */ + struct list *local_es_evi_list; QOBJ_FIELDS }; -DECLARE_QOBJ_TYPE(evpnes) +DECLARE_QOBJ_TYPE(bgpevpn) /* Mapping of Import RT to VNIs. * The Import RTs of all VNIs are maintained in a hash table with each @@ -330,6 +317,16 @@ static inline void encode_es_rt_extcomm(struct ecommunity_val *eval, memcpy(&eval->val[2], mac, ETH_ALEN); } +static inline void encode_esi_label_extcomm(struct ecommunity_val *eval, + bool single_active) +{ + memset(eval, 0, sizeof(struct ecommunity_val)); + eval->val[0] = ECOMMUNITY_ENCODE_EVPN; + eval->val[1] = ECOMMUNITY_EVPN_SUBTYPE_ESI_LABEL; + if (single_active) + eval->val[2] |= (1 << 0); +} + static inline void encode_rmac_extcomm(struct ecommunity_val *eval, struct ethaddr *rmac) { @@ -487,6 +484,47 @@ static inline void build_evpn_type4_prefix(struct prefix_evpn *p, memcpy(&p->prefix.es_addr.esi, esi, sizeof(esi_t)); } +static inline void build_evpn_type1_prefix(struct prefix_evpn *p, + uint32_t eth_tag, + esi_t *esi, + struct in_addr originator_ip) +{ + memset(p, 0, sizeof(struct prefix_evpn)); + p->family = AF_EVPN; + p->prefixlen = EVPN_ROUTE_PREFIXLEN; + p->prefix.route_type = BGP_EVPN_AD_ROUTE; + p->prefix.ead_addr.eth_tag = eth_tag; + p->prefix.ead_addr.ip_prefix_length = IPV4_MAX_BITLEN; + p->prefix.ead_addr.ip.ipa_type = IPADDR_V4; + p->prefix.ead_addr.ip.ipaddr_v4 = originator_ip; + memcpy(&p->prefix.ead_addr.esi, esi, sizeof(esi_t)); +} + +static inline void evpn_type1_prefix_global_copy(struct prefix_evpn *global_p, + const struct prefix_evpn *vni_p) +{ + memcpy(global_p, vni_p, sizeof(*global_p)); + global_p->prefix.ead_addr.ip_prefix_length = 0; + global_p->prefix.ead_addr.ip.ipa_type = 0; + global_p->prefix.ead_addr.ip.ipaddr_v4.s_addr = 0; +} + +/* EAD prefix in the global table doesn't include the VTEP-IP so + * we need to create a different copy for the VNI + */ +static inline struct prefix_evpn *evpn_type1_prefix_vni_copy( + struct prefix_evpn *vni_p, + const struct prefix_evpn *global_p, + struct in_addr originator_ip) +{ + memcpy(vni_p, global_p, sizeof(*vni_p)); + vni_p->prefix.ead_addr.ip_prefix_length = IPV4_MAX_BITLEN; + vni_p->prefix.ead_addr.ip.ipa_type = IPADDR_V4; + vni_p->prefix.ead_addr.ip.ipaddr_v4 = originator_ip; + + return vni_p; +} + static inline int evpn_default_originate_set(struct bgp *bgp, afi_t afi, safi_t safi) { @@ -511,11 +549,6 @@ static inline void es_get_system_mac(esi_t *esi, memcpy(mac, &esi->val[1], ETH_ALEN); } -static inline int is_es_local(struct evpnes *es) -{ - return CHECK_FLAG(es->flags, EVPNES_LOCAL) ? 1 : 0; -} - static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn) { struct bgp *bgp_evpn = NULL; @@ -526,6 +559,8 @@ static inline bool bgp_evpn_is_svi_macip_enabled(struct bgpevpn *vpn) vpn->advertise_svi_macip); } +extern struct zclient *zclient; + extern void bgp_evpn_install_uninstall_default_route(struct bgp *bgp_vrf, afi_t afi, safi_t safi, bool add); @@ -563,10 +598,18 @@ extern struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, vrf_id_t tenant_vrf_id, struct in_addr mcast_grp); extern void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn); -extern struct evpnes *bgp_evpn_lookup_es(struct bgp *bgp, esi_t *esi); extern bool bgp_evpn_lookup_l3vni_l2vni_table(vni_t vni); extern int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn); extern void delete_evpn_route_entry(struct bgp *bgp, afi_t afi, safi_t safi, struct bgp_dest *dest, struct bgp_path_info **pi); +int vni_list_cmp(void *p1, void *p2); +extern int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn, + struct bgp_node *rn); +extern struct bgp_node *bgp_global_evpn_node_get( + struct bgp_table *table, afi_t afi, safi_t safi, + const struct prefix_evpn *evp, struct prefix_rd *prd); +extern struct bgp_node *bgp_global_evpn_node_lookup( + struct bgp_table *table, afi_t afi, safi_t safi, + const struct prefix_evpn *evp, struct prefix_rd *prd); #endif /* _BGP_EVPN_PRIVATE_H */ diff --git a/bgpd/bgp_rd.h b/bgpd/bgp_rd.h index b5ad9d624..2aee44c72 100644 --- a/bgpd/bgp_rd.h +++ b/bgpd/bgp_rd.h @@ -33,6 +33,7 @@ #endif #define RD_ADDRSTRLEN 28 +#define RD_BYTES 8 struct rd_as { uint16_t type; diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 42cb0ec8a..691d5cdec 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -99,6 +99,7 @@ enum bgp_show_adj_route_type { #define BGP_NLRI_PARSE_ERROR_FLOWSPEC_NLRI_SIZELIMIT -12 #define BGP_NLRI_PARSE_ERROR_FLOWSPEC_BAD_FORMAT -13 #define BGP_NLRI_PARSE_ERROR_ADDRESS_FAMILY -14 +#define BGP_NLRI_PARSE_ERROR_EVPN_TYPE1_SIZE -15 #define BGP_NLRI_PARSE_ERROR -32 /* Ancillary information to struct bgp_path_info, diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 80645b031..87936f1dd 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -56,11 +56,11 @@ #include "bgpd/rfapi/vnc_export_bgp.h" #endif #include "bgpd/bgp_evpn.h" -#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_labelpool.h" #include "bgpd/bgp_pbr.h" #include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_mac.h" /* All information about zebra. */ @@ -2499,17 +2499,66 @@ static void bgp_zebra_connected(struct zclient *zclient) BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(bgp, bgp->peer); } -static int bgp_zebra_process_local_es(ZAPI_CALLBACK_ARGS) +static int bgp_zebra_process_local_es_add(ZAPI_CALLBACK_ARGS) +{ + esi_t esi; + struct bgp *bgp = NULL; + struct stream *s = NULL; + char buf[ESI_STR_LEN]; + struct in_addr originator_ip; + uint8_t active; + + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) + return 0; + + s = zclient->ibuf; + stream_get(&esi, s, sizeof(esi_t)); + originator_ip.s_addr = stream_get_ipv4(s); + active = stream_getc(s); + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Rx add ESI %s originator-ip %s active %u", + esi_to_str(&esi, buf, sizeof(buf)), + inet_ntoa(originator_ip), + active); + + bgp_evpn_local_es_add(bgp, &esi, originator_ip, active); + + return 0; +} + +static int bgp_zebra_process_local_es_del(ZAPI_CALLBACK_ARGS) { esi_t esi; struct bgp *bgp = NULL; struct stream *s = NULL; char buf[ESI_STR_LEN]; - char buf1[INET6_ADDRSTRLEN]; - struct ipaddr originator_ip; memset(&esi, 0, sizeof(esi_t)); - memset(&originator_ip, 0, sizeof(struct ipaddr)); + bgp = bgp_lookup_by_vrf_id(vrf_id); + if (!bgp) + return 0; + + s = zclient->ibuf; + stream_get(&esi, s, sizeof(esi_t)); + + if (BGP_DEBUG(zebra, ZEBRA)) + zlog_debug("Rx del ESI %s", + esi_to_str(&esi, buf, sizeof(buf))); + + bgp_evpn_local_es_del(bgp, &esi); + + return 0; +} + +static int bgp_zebra_process_local_es_evi(ZAPI_CALLBACK_ARGS) +{ + esi_t esi; + vni_t vni; + struct bgp *bgp; + struct stream *s; + char buf[ESI_STR_LEN]; bgp = bgp_lookup_by_vrf_id(vrf_id); if (!bgp) @@ -2517,18 +2566,18 @@ static int bgp_zebra_process_local_es(ZAPI_CALLBACK_ARGS) s = zclient->ibuf; stream_get(&esi, s, sizeof(esi_t)); - stream_get(&originator_ip, s, sizeof(struct ipaddr)); + vni = stream_getl(s); if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("Rx %s ESI %s originator-ip %s", - (cmd == ZEBRA_LOCAL_ES_ADD) ? "add" : "del", - esi_to_str(&esi, buf, sizeof(buf)), - ipaddr2str(&originator_ip, buf1, sizeof(buf1))); + zlog_debug("Rx %s ESI %s VNI %u", + ZEBRA_VNI_ADD ? "add" : "del", + esi_to_str(&esi, buf, sizeof(buf)), vni); - if (cmd == ZEBRA_LOCAL_ES_ADD) - bgp_evpn_local_es_add(bgp, &esi, &originator_ip); + if (cmd == ZEBRA_LOCAL_ES_EVI_ADD) + bgp_evpn_local_es_evi_add(bgp, &esi, vni); else - bgp_evpn_local_es_del(bgp, &esi, &originator_ip); + bgp_evpn_local_es_evi_del(bgp, &esi, vni); + return 0; } @@ -2628,6 +2677,8 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS) uint8_t flags = 0; uint32_t seqnum = 0; int state = 0; + char buf2[ESI_STR_LEN]; + esi_t esi; memset(&ip, 0, sizeof(ip)); s = zclient->ibuf; @@ -2651,6 +2702,7 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS) if (cmd == ZEBRA_MACIP_ADD) { flags = stream_getc(s); seqnum = stream_getl(s); + stream_get(&esi, s, sizeof(esi_t)); } else { state = stream_getl(s); } @@ -2660,15 +2712,15 @@ static int bgp_zebra_process_local_macip(ZAPI_CALLBACK_ARGS) return 0; if (BGP_DEBUG(zebra, ZEBRA)) - zlog_debug("%u:Recv MACIP %s flags 0x%x MAC %s IP %s VNI %u seq %u state %d", + zlog_debug("%u:Recv MACIP %s f 0x%x MAC %s IP %s VNI %u seq %u state %d ESI %s", vrf_id, (cmd == ZEBRA_MACIP_ADD) ? "Add" : "Del", flags, prefix_mac2str(&mac, buf, sizeof(buf)), ipaddr2str(&ip, buf1, sizeof(buf1)), vni, seqnum, - state); + state, esi_to_str(&esi, buf2, sizeof(buf2))); if (cmd == ZEBRA_MACIP_ADD) return bgp_evpn_local_macip_add(bgp, vni, &mac, &ip, - flags, seqnum); + flags, seqnum, &esi); else return bgp_evpn_local_macip_del(bgp, vni, &mac, &ip, state); } @@ -2801,9 +2853,11 @@ void bgp_zebra_init(struct thread_master *master, unsigned short instance) zclient->nexthop_update = bgp_read_nexthop_update; zclient->import_check_update = bgp_read_import_check_update; zclient->fec_update = bgp_read_fec_update; - zclient->local_es_add = bgp_zebra_process_local_es; - zclient->local_es_del = bgp_zebra_process_local_es; + zclient->local_es_add = bgp_zebra_process_local_es_add; + zclient->local_es_del = bgp_zebra_process_local_es_del; zclient->local_vni_add = bgp_zebra_process_local_vni; + zclient->local_es_evi_add = bgp_zebra_process_local_es_evi; + zclient->local_es_evi_del = bgp_zebra_process_local_es_evi; zclient->local_vni_del = bgp_zebra_process_local_vni; zclient->local_macip_add = bgp_zebra_process_local_macip; zclient->local_macip_del = bgp_zebra_process_local_macip; diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 35d90d471..c9e6fd2ac 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -87,6 +87,7 @@ #include "bgpd/bgp_pbr.h" #include "bgpd/bgp_addpath.h" #include "bgpd/bgp_evpn_private.h" +#include "bgpd/bgp_evpn_mh.h" #include "bgpd/bgp_mac.h" DEFINE_MTYPE_STATIC(BGPD, PEER_TX_SHUTDOWN_MSG, "Peer shutdown message (TX)"); @@ -6939,6 +6940,7 @@ void bgp_master_init(struct thread_master *master, const int buffer_size) /* mpls label dynamic allocation pool */ bgp_lp_init(bm->master, &bm->labelpool); + bgp_evpn_mh_init(); QOBJ_REG(bm, bgp_master); } @@ -7138,6 +7140,7 @@ void bgp_terminate(void) BGP_TIMER_OFF(bm->t_rmap_update); bgp_mac_finish(); + bgp_evpn_mh_finish(); } struct peer *peer_lookup_in_view(struct vty *vty, struct bgp *bgp, diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 8eea2a5f6..966de8783 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -165,6 +165,9 @@ struct bgp_master { /* How big should we set the socket buffer size */ uint32_t socket_buffer; + /* EVPN multihoming */ + struct bgp_evpn_mh_info *mh_info; + bool terminating; /* global flag that sigint terminate seen */ QOBJ_FIELDS }; @@ -661,9 +664,6 @@ struct bgp { struct bgp_pbr_config *bgp_pbr_cfg; - /* local esi hash table */ - struct hash *esihash; - /* Count of peers in established state */ uint32_t established_peers; diff --git a/lib/prefix.h b/lib/prefix.h index 2d1e8f47d..e3dfc88d5 100644 --- a/lib/prefix.h +++ b/lib/prefix.h @@ -54,6 +54,7 @@ extern "C" { #define MAX_ESI {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} +#define EVPN_ETH_TAG_BYTES 4 #define ESI_BYTES 10 #define ESI_STR_LEN (3 * ESI_BYTES) -- 2.39.5