return;
}
if (set) {
+ if (cum_bw)
+ SET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT);
+ else
+ UNSET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT);
if (all_paths_lb)
SET_FLAG(mpath->mp_flags, BGP_MP_LB_ALL);
else
/*
* bgp_path_info_chkwtd
*
- * Given bestpath bgp_path_info, return if we should attempt to
- * do weighted ECMP or not
+ * Return if we should attempt to do weighted ECMP or not
+ * The path passed in is the bestpath.
*/
-bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path)
+bool bgp_path_info_mpath_chkwtd(struct bgp *bgp, struct bgp_path_info *path)
{
- if (!path->mpath)
+ /* Check if told to ignore weights or not multipath */
+ if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW || !path->mpath)
return false;
- return (path->mpath->mp_flags & BGP_MP_LB_ALL);
+
+ /* All paths in multipath should have associated weight (bandwidth)
+ * unless told explicitly otherwise.
+ */
+ if (bgp->lb_handling != BGP_LINK_BW_SKIP_MISSING &&
+ bgp->lb_handling != BGP_LINK_BW_DEFWT_4_MISSING)
+ return (path->mpath->mp_flags & BGP_MP_LB_ALL);
+
+ /* At least one path should have bandwidth. */
+ return (path->mpath->mp_flags & BGP_MP_LB_PRESENT);
}
/*
/* Flags - relevant as noted. */
uint16_t mp_flags;
-/* Attached to best path, indicates that all multipaths have link-bandwidth */
-#define BGP_MP_LB_ALL 0x1
+#define BGP_MP_LB_PRESENT 0x1 /* Link-bandwidth present for >= 1 path */
+#define BGP_MP_LB_ALL 0x2 /* Link-bandwidth present for all multipaths */
/* Aggregated attribute for advertising multipath route */
struct attr *mp_attr;
/* Accessors for multipath information */
extern uint32_t bgp_path_info_mpath_count(struct bgp_path_info *path);
extern struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path);
-extern bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path);
+extern bool bgp_path_info_mpath_chkwtd(struct bgp *bgp,
+ struct bgp_path_info *path);
extern uint64_t bgp_path_info_mpath_cumbw(struct bgp_path_info *path);
#endif /* _QUAGGA_BGP_MPATH_H */
* been explicitly set by user policy.
*/
if (nh_reset &&
- bgp_path_info_mpath_chkwtd(pi) &&
+ bgp_path_info_mpath_chkwtd(bgp, pi) &&
(cum_bw = bgp_path_info_mpath_cumbw(pi)) != 0 &&
!CHECK_FLAG(attr->rmap_change_flags, BATTR_RMAP_LINK_BW_SET))
attr->ecommunity = ecommunity_replace_linkbw(
return CMD_SUCCESS;
}
+/* "bgp bestpath bandwidth" configuration. */
+DEFPY (bgp_bestpath_bw,
+ bgp_bestpath_bw_cmd,
+ "[no$no] bgp bestpath bandwidth [<ignore|skip-missing|default-weight-for-missing>$bw_cfg]",
+ NO_STR
+ "BGP specific commands\n"
+ "Change the default bestpath selection\n"
+ "Link Bandwidth attribute\n"
+ "Ignore link bandwidth (i.e., do regular ECMP, not weighted)\n"
+ "Ignore paths without link bandwidth for ECMP (if other paths have it)\n"
+ "Assign a low default weight (value 1) to paths not having link bandwidth\n")
+{
+ VTY_DECLVAR_CONTEXT(bgp, bgp);
+ afi_t afi;
+ safi_t safi;
+
+ if (no) {
+ bgp->lb_handling = BGP_LINK_BW_ECMP;
+ } else {
+ if (!bw_cfg) {
+ vty_out(vty, "%% Bandwidth configuration must be specified\n");
+ return CMD_ERR_INCOMPLETE;
+ }
+ if (!strcmp(bw_cfg, "ignore"))
+ bgp->lb_handling = BGP_LINK_BW_IGNORE_BW;
+ else if (!strcmp(bw_cfg, "skip-missing"))
+ bgp->lb_handling = BGP_LINK_BW_SKIP_MISSING;
+ else if (!strcmp(bw_cfg, "default-weight-for-missing"))
+ bgp->lb_handling = BGP_LINK_BW_DEFWT_4_MISSING;
+ else
+ return CMD_ERR_NO_MATCH;
+ }
+
+ /* This config is used in route install, so redo that. */
+ FOREACH_AFI_SAFI (afi, safi) {
+ if (!bgp_fibupd_safi(safi))
+ continue;
+ bgp_zebra_announce_table(bgp, afi, safi);
+ }
+
+ return CMD_SUCCESS;
+}
+
/* "no bgp default ipv4-unicast". */
DEFUN (no_bgp_default_ipv4_unicast,
no_bgp_default_ipv4_unicast_cmd,
vty_out(vty, "\n");
}
+ /* Link bandwidth handling. */
+ if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW)
+ vty_out(vty, " bgp bestpath bandwidth ignore\n");
+ else if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING)
+ vty_out(vty, " bgp bestpath bandwidth skip-missing\n");
+ else if (bgp->lb_handling == BGP_LINK_BW_DEFWT_4_MISSING)
+ vty_out(vty, " bgp bestpath bandwidth default-weight-for-missing\n");
+
/* BGP network import check. */
if (!!CHECK_FLAG(bgp->flags, BGP_FLAG_IMPORT_CHECK)
!= SAVE_BGP_IMPORT_CHECK)
install_element(BGP_NODE, &bgp_bestpath_med_cmd);
install_element(BGP_NODE, &no_bgp_bestpath_med_cmd);
+ /* "bgp bestpath bandwidth" commands */
+ install_element(BGP_NODE, &bgp_bestpath_bw_cmd);
+
/* "no bgp default ipv4-unicast" commands. */
install_element(BGP_NODE, &no_bgp_default_ipv4_unicast_cmd);
install_element(BGP_NODE, &bgp_default_ipv4_unicast_cmd);
return true;
}
-static uint32_t bgp_zebra_nhop_weight(uint32_t bw, uint64_t tot_bw)
+static bool bgp_zebra_use_nhop_weighted(struct bgp *bgp, struct attr *attr,
+ uint64_t tot_bw, uint32_t *nh_weight)
{
- uint64_t tmp = (uint64_t)bw * 100;
- return ((uint32_t)(tmp / tot_bw));
+ uint32_t bw;
+ uint64_t tmp;
+
+ bw = attr->link_bw;
+ /* zero link-bandwidth and link-bandwidth not present are treated
+ * as the same situation.
+ */
+ if (!bw) {
+ /* the only situations should be if we're either told
+ * to skip or use default weight.
+ */
+ if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING)
+ return false;
+ *nh_weight = BGP_ZEBRA_DEFAULT_NHOP_WEIGHT;
+ } else {
+ tmp = (uint64_t)bw * 100;
+ *nh_weight = ((uint32_t)(tmp / tot_bw));
+ }
+
+ return true;
}
void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
metric = info->attr->med;
/* Determine if we're doing weighted ECMP or not */
- do_wt_ecmp = bgp_path_info_mpath_chkwtd(info);
+ do_wt_ecmp = bgp_path_info_mpath_chkwtd(bgp, info);
if (do_wt_ecmp)
cum_bw = bgp_path_info_mpath_cumbw(info);
for (mpinfo = info; mpinfo; mpinfo = bgp_path_info_mpath_next(mpinfo)) {
+ uint32_t nh_weight;
+
if (valid_nh_count >= multipath_num)
break;
*mpinfo_cp = *mpinfo;
+ nh_weight = 0;
/* Get nexthop address-family */
if (p->family == AF_INET
else
continue;
+ /* If processing for weighted ECMP, determine the next hop's
+ * weight. Based on user setting, we may skip the next hop
+ * in some situations.
+ */
+ if (do_wt_ecmp) {
+ if (!bgp_zebra_use_nhop_weighted(bgp, mpinfo->attr,
+ cum_bw, &nh_weight))
+ continue;
+ }
api_nh = &api.nexthops[valid_nh_count];
if (nh_family == AF_INET) {
if (bgp_debug_zebra(&api.prefix)) {
}
memcpy(&api_nh->rmac, &(mpinfo->attr->rmac),
sizeof(struct ethaddr));
+ api_nh->weight = nh_weight;
- /* Update next hop's weight for weighted ECMP */
- if (do_wt_ecmp)
- api_nh->weight = bgp_zebra_nhop_weight(
- mpinfo->attr->link_bw, cum_bw);
valid_nh_count++;
}
#include "vxlan.h"
+/* Default weight for next hop, if doing weighted ECMP. */
+#define BGP_ZEBRA_DEFAULT_NHOP_WEIGHT 1
+
extern void bgp_zebra_init(struct thread_master *master,
unsigned short instance);
extern void bgp_zebra_init_tm_connect(struct bgp *bgp);
bgp->dynamic_neighbors_limit = BGP_DYNAMIC_NEIGHBORS_LIMIT_DEFAULT;
bgp->dynamic_neighbors_count = 0;
bgp->lb_ref_bw = BGP_LINK_BW_REF_BW;
+ bgp->lb_handling = BGP_LINK_BW_ECMP;
bgp->ebgp_requires_policy = DEFAULT_EBGP_POLICY_DISABLED;
bgp->reject_as_sets = BGP_REJECT_AS_SETS_DISABLED;
bgp_addpath_init_bgp_data(&bgp->tx_addpath);
#define BGP_GR_SUCCESS 0
#define BGP_GR_FAILURE 1
+/* Handling of BGP link bandwidth (LB) on receiver - whether and how to
+ * do weighted ECMP. Note: This applies after multipath computation.
+ */
+enum bgp_link_bw_handling {
+ /* Do ECMP if some paths don't have LB - default */
+ BGP_LINK_BW_ECMP,
+ /* Completely ignore LB, just do regular ECMP */
+ BGP_LINK_BW_IGNORE_BW,
+ /* Skip paths without LB, do wECMP on others */
+ BGP_LINK_BW_SKIP_MISSING,
+ /* Do wECMP with default weight for paths not having LB */
+ BGP_LINK_BW_DEFWT_4_MISSING
+};
+
/* BGP instance structure. */
struct bgp {
/* AS number of this BGP instance. */
/* Count of peers in established state */
uint32_t established_peers;
+ /* Weighted ECMP related config. */
+ enum bgp_link_bw_handling lb_handling;
+
QOBJ_FIELDS
};
DECLARE_QOBJ_TYPE(bgp)