#include "bgpd/bgp_nexthop.h"
#include "bgpd/bgp_addpath.h"
#include "bgpd/bgp_mac.h"
+#include "bgpd/bgp_vty.h"
/*
* Definitions and external declarations.
}
}
+static void bgp_evpn_get_rmac_nexthop(struct bgpevpn *vpn,
+ struct prefix_evpn *p,
+ struct attr *attr, uint8_t flags)
+{
+ struct bgp *bgp_vrf = vpn->bgp_vrf;
+
+ memset(&attr->rmac, 0, sizeof(struct ethaddr));
+ if (!bgp_vrf)
+ return;
+
+ if (p->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
+ return;
+
+ /* Copy sys (pip) RMAC and PIP IP as nexthop
+ * in case of route is self MAC-IP,
+ * advertise-pip and advertise-svi-ip features
+ * are enabled.
+ * Otherwise, for all host MAC-IP route's
+ * copy anycast RMAC.
+ */
+ if (CHECK_FLAG(flags, BGP_EVPN_MACIP_TYPE_SVI_IP)
+ && bgp_vrf->evpn_info->advertise_pip &&
+ bgp_vrf->evpn_info->is_anycast_mac) {
+ /* copy sys rmac */
+ memcpy(&attr->rmac, &bgp_vrf->evpn_info->pip_rmac,
+ ETH_ALEN);
+ attr->nexthop = bgp_vrf->evpn_info->pip_ip;
+ attr->mp_nexthop_global_in =
+ bgp_vrf->evpn_info->pip_ip;
+ } else
+ memcpy(&attr->rmac, &bgp_vrf->rmac, ETH_ALEN);
+}
+
/*
* Create RT extended community automatically from passed information:
* of the form AS:VNI.
* present, else treat as locally originated.
*/
if (src_attr)
- bgp_attr_dup(&attr, src_attr);
+ attr = *src_attr;
else {
memset(&attr, 0, sizeof(struct attr));
bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
}
- /* Set nexthop to ourselves and fill in the Router MAC. */
- attr.nexthop = bgp_vrf->originator_ip;
- attr.mp_nexthop_global_in = bgp_vrf->originator_ip;
+
+ /* Advertise Primary IP (PIP) is enabled, send individual
+ * IP (default instance router-id) as nexthop.
+ * PIP is disabled or vrr interface is not present
+ * use anycast-IP as nexthop and anycast RMAC.
+ */
+ if (!bgp_vrf->evpn_info->advertise_pip ||
+ (!bgp_vrf->evpn_info->is_anycast_mac)) {
+ attr.nexthop = bgp_vrf->originator_ip;
+ attr.mp_nexthop_global_in = bgp_vrf->originator_ip;
+ memcpy(&attr.rmac, &bgp_vrf->rmac, ETH_ALEN);
+ } else {
+ /* copy sys rmac */
+ memcpy(&attr.rmac, &bgp_vrf->evpn_info->pip_rmac, ETH_ALEN);
+ if (bgp_vrf->evpn_info->pip_ip.s_addr != INADDR_ANY) {
+ attr.nexthop = bgp_vrf->evpn_info->pip_ip;
+ attr.mp_nexthop_global_in = bgp_vrf->evpn_info->pip_ip;
+ } else if (bgp_vrf->evpn_info->pip_ip.s_addr == INADDR_ANY)
+ if (bgp_debug_zebra(NULL)) {
+ char buf1[PREFIX_STRLEN];
+
+ zlog_debug("VRF %s evp %s advertise-pip primary ip is not configured",
+ vrf_id_to_name(bgp_vrf->vrf_id),
+ prefix2str(evp, buf1, sizeof(buf1)));
+ }
+ }
+
+ if (bgp_debug_zebra(NULL)) {
+ char buf[ETHER_ADDR_STRLEN];
+ char buf1[PREFIX_STRLEN];
+ char buf2[INET6_ADDRSTRLEN];
+
+ zlog_debug("VRF %s type-5 route evp %s RMAC %s nexthop %s",
+ vrf_id_to_name(bgp_vrf->vrf_id),
+ prefix2str(evp, buf1, sizeof(buf1)),
+ prefix_mac2str(&attr.rmac, buf, sizeof(buf)),
+ inet_ntop(AF_INET, &attr.nexthop, buf2,
+ INET_ADDRSTRLEN));
+ }
+
attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
- memcpy(&attr.rmac, &bgp_vrf->rmac, sizeof(struct ethaddr));
/* Setup RT and encap extended community */
build_evpn_type5_route_extcomm(bgp_vrf, &attr);
memcpy(&tmp_pi->extra->label, label, sizeof(label));
tmp_pi->extra->num_labels = num_labels;
+ /* Mark route as self type-2 route */
+ if (flags && CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP))
+ tmp_pi->extra->af_flags = BGP_EVPN_MACIP_TYPE_SVI_IP;
bgp_path_info_add(rn, tmp_pi);
} else {
tmp_pi = local_pi;
}
/* router mac is only needed for type-2 routes here. */
- if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
- bgpevpn_get_rmac(vpn, &attr.rmac);
+ if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) {
+ uint8_t af_flags = 0;
+
+ if (CHECK_FLAG(flags, ZEBRA_MACIP_TYPE_SVI_IP))
+ SET_FLAG(af_flags, BGP_EVPN_MACIP_TYPE_SVI_IP);
+
+ bgp_evpn_get_rmac_nexthop(vpn, p, &attr, af_flags);
+
+ if (bgp_debug_zebra(NULL)) {
+ char buf[ETHER_ADDR_STRLEN];
+ char buf1[PREFIX_STRLEN];
+
+ zlog_debug("VRF %s vni %u type-2 route evp %s RMAC %s nexthop %s",
+ vpn->bgp_vrf ?
+ vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ",
+ vpn->vni,
+ prefix2str(p, buf1, sizeof(buf1)),
+ prefix_mac2str(&attr.rmac, buf,
+ sizeof(buf)),
+ inet_ntoa(attr.mp_nexthop_global_in));
+ }
+ }
+
vni2label(vpn->vni, &(attr.label));
/* Include L3 VNI related RTs and RMAC for type-2 routes, if they're
/* lock ri to prevent freeing in evpn_route_select_install */
bgp_path_info_lock(pi);
- /* Perform route selection; this is just to set the flags correctly
- * as local route in the VNI always wins.
- */
+
+ /* Perform route selection. Normally, the local route in the
+ * VNI is expected to win and be the best route. However, if
+ * there is a race condition where a host moved from local to
+ * remote and the remote route was received in BGP just prior
+ * to the local MACIP notification from zebra, the remote
+ * route would win, and we should evict the defunct local route
+ * and (re)install the remote route into zebra.
+ */
evpn_route_select_install(bgp, vpn, rn);
/*
* If the new local route was not selected evict it and tell zebra
attr.nexthop = vpn->originator_ip;
attr.mp_nexthop_global_in = vpn->originator_ip;
attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
- bgpevpn_get_rmac(vpn, &attr.rmac);
+ bgp_evpn_get_rmac_nexthop(vpn, evp, &attr,
+ tmp_pi->extra->af_flags);
if (evpn_route_is_sticky(bgp, rn))
attr.sticky = 1;
attr.router_flag = 1;
}
+ if (bgp_debug_zebra(NULL)) {
+ char buf[ETHER_ADDR_STRLEN];
+ char buf1[PREFIX_STRLEN];
+
+ zlog_debug("VRF %s vni %u evp %s RMAC %s nexthop %s",
+ vpn->bgp_vrf ?
+ vrf_id_to_name(vpn->bgp_vrf->vrf_id) : " ",
+ vpn->vni,
+ prefix2str(evp, buf1, sizeof(buf1)),
+ prefix_mac2str(&attr.rmac, buf, sizeof(buf)),
+ inet_ntoa(attr.mp_nexthop_global_in));
+ }
+
/* Add L3 VNI RTs and RMAC for non IPv6 link-local if
* using L3 VNI for type-2 routes also.
*/
update_evpn_route_entry(bgp, vpn, afi, safi, rn, &attr, 0, &pi,
0, seq);
- /* Perform route selection; this is just to set the flags
- * correctly as local route in the VNI always wins.
+ /* lock ri to prevent freeing in evpn_route_select_install */
+ bgp_path_info_lock(pi);
+
+ /* Perform route selection. Normally, the local route in the
+ * VNI is expected to win and be the best route. However,
+ * under peculiar situations (e.g., tunnel (next hop) IP change
+ * that causes best selection to be based on next hop), a
+ * remote route could win. If the local route is the best,
+ * ensure it is updated in the global EVPN route table and
+ * advertised to peers; otherwise, ensure it is evicted and
+ * (re)install the remote route into zebra.
*/
evpn_route_select_install(bgp, vpn, rn);
-
- attr_new = pi->attr;
-
- /* Update route in global routing table. */
- rd_rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
- (struct prefix *)evp, &vpn->prd);
- assert(rd_rn);
- update_evpn_route_entry(bgp, vpn, afi, safi, rd_rn, attr_new, 0,
- &global_pi, 0,
- mac_mobility_seqnum(attr_new));
-
- /* Schedule for processing and unlock node. */
- bgp_process(bgp, rd_rn, afi, safi);
- bgp_unlock_node(rd_rn);
+ if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
+ evpn_cleanup_local_non_best_route(bgp, vpn, rn, pi);
+ /* unlock pi */
+ bgp_path_info_unlock(pi);
+ } else {
+ attr_new = pi->attr;
+ /* unlock pi */
+ bgp_path_info_unlock(pi);
+
+ /* Update route in global routing table. */
+ rd_rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
+ (struct prefix *)evp, &vpn->prd);
+ assert(rd_rn);
+ update_evpn_route_entry(bgp, vpn, afi, safi, rd_rn,
+ attr_new, 0, &global_pi, 0,
+ mac_mobility_seqnum(attr_new));
+
+ /* Schedule for processing and unlock node. */
+ bgp_process(bgp, rd_rn, afi, safi);
+ bgp_unlock_node(rd_rn);
+ }
/* Unintern temporary. */
aspath_unintern(&attr.aspath);
* situations need the route in the per-VNI table as well as the global
* table to be updated (as attributes change).
*/
-static int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
+int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
{
int ret;
struct prefix_evpn p;
static struct bgp_path_info *
bgp_create_evpn_bgp_path_info(struct bgp_path_info *parent_pi,
- struct bgp_node *rn)
+ struct bgp_node *rn, struct attr *attr)
{
struct attr *attr_new;
struct bgp_path_info *pi;
/* Add (or update) attribute to hash. */
- attr_new = bgp_attr_intern(parent_pi->attr);
+ attr_new = bgp_attr_intern(attr);
/* Create new route with its attribute. */
pi = info_make(parent_pi->type, BGP_ROUTE_IMPORTED, 0, parent_pi->peer,
* address for the rest of the code to flow through. In the case of IPv4,
* make sure to set the flag for next hop attribute.
*/
- bgp_attr_dup(&attr, parent_pi->attr);
+ attr = *parent_pi->attr;
if (afi == AFI_IP6)
evpn_convert_nexthop_to_ipv6(&attr);
else
break;
if (!pi)
- pi = bgp_create_evpn_bgp_path_info(parent_pi, rn);
+ pi = bgp_create_evpn_bgp_path_info(parent_pi, rn, &attr);
else {
if (attrhash_cmp(pi->attr, &attr)
&& !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
pi->attr = attr_new;
pi->uptime = bgp_clock();
}
+ /* as it is an importation, change nexthop */
+ bgp_path_info_set_flag(rn, pi, BGP_PATH_ANNC_NH_SELF);
bgp_aggregate_increment(bgp_vrf, &rn->p, pi, afi, safi);
if (!pi) {
/* Create an info */
- (void)bgp_create_evpn_bgp_path_info(parent_pi, rn);
+ (void)bgp_create_evpn_bgp_path_info(parent_pi, rn,
+ parent_pi->attr);
} else {
if (attrhash_cmp(pi->attr, parent_pi->attr)
&& !CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
* update and advertise all ipv4 and ipv6 routes in thr vrf table as type-5
* routes
*/
-static void update_advertise_vrf_routes(struct bgp *bgp_vrf)
+void update_advertise_vrf_routes(struct bgp *bgp_vrf)
{
+ struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */
+
+ bgp_evpn = bgp_get_evpn();
+ if (!bgp_evpn)
+ return;
+
/* update all ipv4 routes */
if (advertise_type5_routes(bgp_vrf, AFI_IP))
bgp_evpn_advertise_type5_routes(bgp_vrf, AFI_IP, SAFI_UNICAST);
uint32_t eth_tag;
mpls_label_t label; /* holds the VNI as in the packet */
int ret;
+ afi_t gw_afi;
+ bool is_valid_update = false;
/* Type-5 route should be 34 or 58 bytes:
* RD (8), ESI (10), Eth Tag (4), IP len (1), IP (4 or 16),
pfx += 4;
memcpy(&evpn.gw_ip.ipv4, pfx, 4);
pfx += 4;
+ gw_afi = AF_INET;
} else {
SET_IPADDR_V6(&p.prefix.prefix_addr.ip);
memcpy(&p.prefix.prefix_addr.ip.ipaddr_v6, pfx, 16);
pfx += 16;
memcpy(&evpn.gw_ip.ipv6, pfx, 16);
pfx += 16;
+ gw_afi = AF_INET6;
}
/* Get the VNI (in MPLS label field). Stored as bytes here. */
* field
*/
+ if (attr) {
+ is_valid_update = true;
+ if (is_zero_mac(&attr->rmac) && is_zero_esi(&evpn.eth_s_id) &&
+ is_zero_gw_ip(&evpn.gw_ip, gw_afi))
+ is_valid_update = false;
+
+ if (is_mcast_mac(&attr->rmac) || is_bcast_mac(&attr->rmac))
+ is_valid_update = false;
+ }
+
/* Process the route. */
- if (attr)
+ if (is_valid_update)
ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
&prd, &label, 1, 0, &evpn);
*/
void bgp_evpn_handle_router_id_update(struct bgp *bgp, int withdraw)
{
+ struct listnode *node;
+ struct bgp *bgp_vrf;
+
if (withdraw) {
/* delete and withdraw all the type-5 routes
(void (*)(struct hash_bucket *,
void *))withdraw_router_id_vni,
bgp);
+
+ if (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) {
+ for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) {
+ if (bgp_vrf->evpn_info->advertise_pip &&
+ (bgp_vrf->evpn_info->pip_ip_static.s_addr
+ == INADDR_ANY))
+ bgp_vrf->evpn_info->pip_ip.s_addr
+ = INADDR_ANY;
+ }
+ }
} else {
+ /* Assign new default instance router-id */
+ if (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) {
+ for (ALL_LIST_ELEMENTS_RO(bm->bgp, node, bgp_vrf)) {
+ if (bgp_vrf->evpn_info->advertise_pip &&
+ (bgp_vrf->evpn_info->pip_ip_static.s_addr
+ == INADDR_ANY)) {
+ bgp_vrf->evpn_info->pip_ip =
+ bgp->router_id;
+ /* advertise type-5 routes with
+ * new nexthop
+ */
+ update_advertise_vrf_routes(bgp_vrf);
+ }
+ }
+ }
+
/* advertise all routes in the vrf as type-5 routes with the new
* RD
*/
if (pnt + BGP_ADDPATH_ID_LEN > lim)
return BGP_NLRI_PARSE_ERROR_PACKET_OVERFLOW;
- addpath_id = ntohl(*((uint32_t *)pnt));
+ memcpy(&addpath_id, pnt, BGP_ADDPATH_ID_LEN);
+ addpath_id = ntohl(addpath_id);
pnt += BGP_ADDPATH_ID_LEN;
}
if (!(pi->type == ZEBRA_ROUTE_BGP
&& pi->sub_type == BGP_ROUTE_NORMAL))
continue;
-
- if (bgp_nexthop_self(bgp, pi->attr->nexthop)) {
+ if (bgp_nexthop_self(bgp, afi,
+ pi->type, pi->sub_type,
+ pi->attr, rn)) {
char attr_str[BUFSIZ] = {0};
char pbuf[PREFIX_STRLEN];
bgpevpn_link_to_l3vni(vpn);
}
-int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id, struct ethaddr *rmac,
+int bgp_evpn_local_l3vni_add(vni_t l3vni, vrf_id_t vrf_id,
+ struct ethaddr *svi_rmac,
+ struct ethaddr *vrr_rmac,
struct in_addr originator_ip, int filter,
- ifindex_t svi_ifindex)
+ ifindex_t svi_ifindex,
+ bool is_anycast_mac)
{
struct bgp *bgp_vrf = NULL; /* bgp VRF instance */
struct bgp *bgp_evpn = NULL; /* EVPN bgp instance */
int ret = 0;
- ret = bgp_get(&bgp_vrf, &as, vrf_id_to_name(vrf_id),
- vrf_id == VRF_DEFAULT ? BGP_INSTANCE_TYPE_DEFAULT
- : BGP_INSTANCE_TYPE_VRF);
+ ret = bgp_get_vty(&bgp_vrf, &as, vrf_id_to_name(vrf_id),
+ vrf_id == VRF_DEFAULT
+ ? BGP_INSTANCE_TYPE_DEFAULT
+ : BGP_INSTANCE_TYPE_VRF);
switch (ret) {
case BGP_ERR_AS_MISMATCH:
flog_err(EC_BGP_EVPN_AS_MISMATCH,
/* associate the vrf with l3vni and related parameters */
bgp_vrf->l3vni = l3vni;
- memcpy(&bgp_vrf->rmac, rmac, sizeof(struct ethaddr));
bgp_vrf->originator_ip = originator_ip;
bgp_vrf->l3vni_svi_ifindex = svi_ifindex;
+ bgp_vrf->evpn_info->is_anycast_mac = is_anycast_mac;
+ /* copy anycast MAC from VRR MAC */
+ memcpy(&bgp_vrf->rmac, vrr_rmac, ETH_ALEN);
+ /* copy sys RMAC from SVI MAC */
+ memcpy(&bgp_vrf->evpn_info->pip_rmac_zebra, svi_rmac, ETH_ALEN);
+ /* PIP user configured mac is not present use svi mac as sys mac */
+ if (is_zero_mac(&bgp_vrf->evpn_info->pip_rmac_static))
+ memcpy(&bgp_vrf->evpn_info->pip_rmac, svi_rmac, ETH_ALEN);
+
+ if (bgp_debug_zebra(NULL)) {
+ char buf[ETHER_ADDR_STRLEN];
+ char buf1[ETHER_ADDR_STRLEN];
+ char buf2[ETHER_ADDR_STRLEN];
+
+ zlog_debug("VRF %s vni %u pip %s RMAC %s sys RMAC %s static RMAC %s is_anycast_mac %s",
+ vrf_id_to_name(bgp_vrf->vrf_id),
+ bgp_vrf->l3vni,
+ bgp_vrf->evpn_info->advertise_pip ? "enable"
+ : "disable",
+ prefix_mac2str(&bgp_vrf->rmac, buf, sizeof(buf)),
+ prefix_mac2str(&bgp_vrf->evpn_info->pip_rmac,
+ buf1, sizeof(buf1)),
+ prefix_mac2str(&bgp_vrf->evpn_info->pip_rmac_static,
+ buf2, sizeof(buf2)),
+ is_anycast_mac ? "Enable" : "Disable");
+ }
/* set the right filter - are we using l3vni only for prefix routes? */
if (filter)
SET_FLAG(bgp_vrf->vrf_flags, BGP_VRF_L3VNI_PREFIX_ROUTES_ONLY);
/* remove the Rmac from the BGP vrf */
memset(&bgp_vrf->rmac, 0, sizeof(struct ethaddr));
+ memset(&bgp_vrf->evpn_info->pip_rmac_zebra, 0, ETH_ALEN);
+ if (is_zero_mac(&bgp_vrf->evpn_info->pip_rmac_static) &&
+ !is_zero_mac(&bgp_vrf->evpn_info->pip_rmac))
+ memset(&bgp_vrf->evpn_info->pip_rmac, 0, ETH_ALEN);
/* remove default import RT or Unmap non-default import RT */
if (!list_isempty(bgp_vrf->vrf_import_rtl)) {
bgp->evpn_info->dad_freeze_time = 0;
/* Initialize zebra vxlan */
bgp_zebra_dup_addr_detection(bgp);
+ /* Enable PIP feature by default for bgp vrf instance */
+ if (bgp->inst_type == BGP_INSTANCE_TYPE_VRF) {
+ struct bgp *bgp_default;
+
+ bgp->evpn_info->advertise_pip = true;
+ bgp_default = bgp_get_default();
+ if (bgp_default)
+ bgp->evpn_info->pip_ip = bgp_default->router_id;
+ }
}
/* Default BUM handling is to do head-end replication. */
return evp->prefix.prefix_addr.ip_prefix_length;
}
+
+/*
+ * Should we register nexthop for this EVPN prefix for nexthop tracking?
+ */
+bool bgp_evpn_is_prefix_nht_supported(struct prefix *pfx)
+{
+ struct prefix_evpn *evp = (struct prefix_evpn *)pfx;
+
+ /*
+ * EVPN RT-5 should not be marked as valid and imported to vrfs if the
+ * BGP nexthop is not reachable. To check for the nexthop reachability,
+ * Add nexthop for EVPN RT-5 for nexthop tracking.
+ *
+ * Ideally, a BGP route should be marked as valid only if the
+ * nexthop is reachable. Thus, other EVPN route types also should be
+ * added here after testing is performed for them.
+ */
+ if (pfx && pfx->family == AF_EVPN &&
+ evp->prefix.route_type == BGP_EVPN_IP_PREFIX_ROUTE)
+ return true;
+
+ return false;
+}