]> git.proxmox.com Git - mirror_frr.git/commitdiff
bgpd: Implement options for link bandwidth handling
authorvivek <vivek@cumulusnetworks.com>
Tue, 24 Mar 2020 21:38:37 +0000 (14:38 -0700)
committervivek <vivek@cumulusnetworks.com>
Tue, 31 Mar 2020 03:12:31 +0000 (20:12 -0700)
Support configurable options to control how link bandwidth is handled
by the receiver. The default behavior is to automatically honor the
link bandwidths received and use it to perform a weighted ECMP BUT only
if all paths in the multipath have associated link bandwidth; if one or
more paths do not have link bandwidth, normal ECMP is performed among
the multipaths. This behavior is as recommended by
https://tools.ietf.org/html/draft-ietf-idr-link-bandwidth.

The additional options available are to (a) completely ignore any link
bandwidth (i.e., weighted ECMP is effectively disabled), (b) skip paths
in the multipath which do not have link bandwidth and perform weighted
ECMP among the other paths (if at least some paths have the bandwidth)
or (c) use a default weight (value chosen is 1) for the paths which
do not have link bandwidth.

The command syntax is
bgp bestpath bandwidth <ignore|skip-missing|default-weight-for-missing>

Signed-off-by: Vivek Venkatraman <vivek@cumulusnetworks.com>
bgpd/bgp_mpath.c
bgpd/bgp_mpath.h
bgpd/bgp_route.c
bgpd/bgp_vty.c
bgpd/bgp_zebra.c
bgpd/bgp_zebra.h
bgpd/bgpd.c
bgpd/bgpd.h

index a3d40849ce7a5e5e4241b0fbabf4a40c10c028f8..f66f56cb49755d094b67e8d374614ffe37d83417 100644 (file)
@@ -419,6 +419,10 @@ static void bgp_path_info_mpath_lb_update(struct bgp_path_info *path, bool set,
                        return;
        }
        if (set) {
+               if (cum_bw)
+                       SET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT);
+               else
+                       UNSET_FLAG(mpath->mp_flags, BGP_MP_LB_PRESENT);
                if (all_paths_lb)
                        SET_FLAG(mpath->mp_flags, BGP_MP_LB_ALL);
                else
@@ -446,14 +450,24 @@ struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path)
 /*
  * bgp_path_info_chkwtd
  *
- * Given bestpath bgp_path_info, return if we should attempt to
- * do weighted ECMP or not
+ * Return if we should attempt to do weighted ECMP or not
+ * The path passed in is the bestpath.
  */
-bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path)
+bool bgp_path_info_mpath_chkwtd(struct bgp *bgp, struct bgp_path_info *path)
 {
-       if (!path->mpath)
+       /* Check if told to ignore weights or not multipath */
+       if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW || !path->mpath)
                return false;
-       return (path->mpath->mp_flags & BGP_MP_LB_ALL);
+
+       /* All paths in multipath should have associated weight (bandwidth)
+        * unless told explicitly otherwise.
+        */
+       if (bgp->lb_handling != BGP_LINK_BW_SKIP_MISSING &&
+           bgp->lb_handling != BGP_LINK_BW_DEFWT_4_MISSING)
+               return (path->mpath->mp_flags & BGP_MP_LB_ALL);
+
+       /* At least one path should have bandwidth. */
+       return (path->mpath->mp_flags & BGP_MP_LB_PRESENT);
 }
 
 /*
index 6d3df29415d981a5fdf9d17b5da231e90f066ed8..34f94b256b5bca7bcdcb9f9ecaa0f6d4083e3232 100644 (file)
@@ -40,8 +40,8 @@ struct bgp_path_info_mpath {
 
        /* Flags - relevant as noted. */
        uint16_t mp_flags;
-/* Attached to best path, indicates that all multipaths have link-bandwidth */
-#define BGP_MP_LB_ALL 0x1
+#define BGP_MP_LB_PRESENT 0x1 /* Link-bandwidth present for >= 1 path */
+#define BGP_MP_LB_ALL 0x2 /* Link-bandwidth present for all multipaths */
 
        /* Aggregated attribute for advertising multipath route */
        struct attr *mp_attr;
@@ -86,7 +86,8 @@ bgp_path_info_mpath_next(struct bgp_path_info *path);
 /* Accessors for multipath information */
 extern uint32_t bgp_path_info_mpath_count(struct bgp_path_info *path);
 extern struct attr *bgp_path_info_mpath_attr(struct bgp_path_info *path);
-extern bool bgp_path_info_mpath_chkwtd(struct bgp_path_info *path);
+extern bool bgp_path_info_mpath_chkwtd(struct bgp *bgp,
+                                      struct bgp_path_info *path);
 extern uint64_t bgp_path_info_mpath_cumbw(struct bgp_path_info *path);
 
 #endif /* _QUAGGA_BGP_MPATH_H */
index 8bb4bb075b14460e0d0e9d494b674e33c1f0787a..2009375c8934a4947115b142de9de691fdc7945d 100644 (file)
@@ -2066,7 +2066,7 @@ bool subgroup_announce_check(struct bgp_node *rn, struct bgp_path_info *pi,
         * been explicitly set by user policy.
         */
        if (nh_reset &&
-           bgp_path_info_mpath_chkwtd(pi) &&
+           bgp_path_info_mpath_chkwtd(bgp, pi) &&
            (cum_bw = bgp_path_info_mpath_cumbw(pi)) != 0 &&
            !CHECK_FLAG(attr->rmap_change_flags, BATTR_RMAP_LINK_BW_SET))
                attr->ecommunity = ecommunity_replace_linkbw(
index aea6dbbf4e143e049fa5ccfdb84019256a5a4a24..544bee2a5495957485d00b7de94add7aa133b8e7 100644 (file)
@@ -2973,6 +2973,49 @@ DEFUN (no_bgp_bestpath_med,
        return CMD_SUCCESS;
 }
 
+/* "bgp bestpath bandwidth" configuration. */
+DEFPY (bgp_bestpath_bw,
+       bgp_bestpath_bw_cmd,
+       "[no$no] bgp bestpath bandwidth [<ignore|skip-missing|default-weight-for-missing>$bw_cfg]",
+       NO_STR
+       "BGP specific commands\n"
+       "Change the default bestpath selection\n"
+       "Link Bandwidth attribute\n"
+       "Ignore link bandwidth (i.e., do regular ECMP, not weighted)\n"
+       "Ignore paths without link bandwidth for ECMP (if other paths have it)\n"
+       "Assign a low default weight (value 1) to paths not having link bandwidth\n")
+{
+       VTY_DECLVAR_CONTEXT(bgp, bgp);
+       afi_t afi;
+       safi_t safi;
+
+       if (no) {
+               bgp->lb_handling = BGP_LINK_BW_ECMP;
+       } else {
+               if (!bw_cfg) {
+                       vty_out(vty, "%% Bandwidth configuration must be specified\n");
+                       return CMD_ERR_INCOMPLETE;
+               }
+               if (!strcmp(bw_cfg, "ignore"))
+                       bgp->lb_handling = BGP_LINK_BW_IGNORE_BW;
+               else if (!strcmp(bw_cfg, "skip-missing"))
+                       bgp->lb_handling = BGP_LINK_BW_SKIP_MISSING;
+               else if (!strcmp(bw_cfg, "default-weight-for-missing"))
+                       bgp->lb_handling = BGP_LINK_BW_DEFWT_4_MISSING;
+               else
+                       return CMD_ERR_NO_MATCH;
+       }
+
+       /* This config is used in route install, so redo that. */
+       FOREACH_AFI_SAFI (afi, safi) {
+               if (!bgp_fibupd_safi(safi))
+                       continue;
+               bgp_zebra_announce_table(bgp, afi, safi);
+       }
+
+       return CMD_SUCCESS;
+}
+
 /* "no bgp default ipv4-unicast". */
 DEFUN (no_bgp_default_ipv4_unicast,
        no_bgp_default_ipv4_unicast_cmd,
@@ -15161,6 +15204,14 @@ int bgp_config_write(struct vty *vty)
                        vty_out(vty, "\n");
                }
 
+               /* Link bandwidth handling. */
+               if (bgp->lb_handling == BGP_LINK_BW_IGNORE_BW)
+                       vty_out(vty, " bgp bestpath bandwidth ignore\n");
+               else if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING)
+                       vty_out(vty, " bgp bestpath bandwidth skip-missing\n");
+               else if (bgp->lb_handling == BGP_LINK_BW_DEFWT_4_MISSING)
+                       vty_out(vty, " bgp bestpath bandwidth default-weight-for-missing\n");
+
                /* BGP network import check. */
                if (!!CHECK_FLAG(bgp->flags, BGP_FLAG_IMPORT_CHECK)
                    != SAVE_BGP_IMPORT_CHECK)
@@ -15567,6 +15618,9 @@ void bgp_vty_init(void)
        install_element(BGP_NODE, &bgp_bestpath_med_cmd);
        install_element(BGP_NODE, &no_bgp_bestpath_med_cmd);
 
+       /* "bgp bestpath bandwidth" commands */
+       install_element(BGP_NODE, &bgp_bestpath_bw_cmd);
+
        /* "no bgp default ipv4-unicast" commands. */
        install_element(BGP_NODE, &no_bgp_default_ipv4_unicast_cmd);
        install_element(BGP_NODE, &bgp_default_ipv4_unicast_cmd);
index db99811d5ce6dc8d1aca1232d43bcd033b8d14d1..b01bb788a543f0fc351f56704fe2283ed1e78576 100644 (file)
@@ -1148,10 +1148,29 @@ static bool update_ipv6nh_for_route_install(int nh_othervrf, struct bgp *nh_bgp,
        return true;
 }
 
-static uint32_t bgp_zebra_nhop_weight(uint32_t bw, uint64_t tot_bw)
+static bool bgp_zebra_use_nhop_weighted(struct bgp *bgp, struct attr *attr,
+                                       uint64_t tot_bw, uint32_t *nh_weight)
 {
-       uint64_t tmp = (uint64_t)bw * 100;
-       return ((uint32_t)(tmp / tot_bw));
+       uint32_t bw;
+       uint64_t tmp;
+
+       bw = attr->link_bw;
+       /* zero link-bandwidth and link-bandwidth not present are treated
+        * as the same situation.
+        */
+       if (!bw) {
+               /* the only situations should be if we're either told
+                * to skip or use default weight.
+                */
+               if (bgp->lb_handling == BGP_LINK_BW_SKIP_MISSING)
+                       return false;
+               *nh_weight = BGP_ZEBRA_DEFAULT_NHOP_WEIGHT;
+       } else {
+               tmp = (uint64_t)bw * 100;
+               *nh_weight = ((uint32_t)(tmp / tot_bw));
+       }
+
+       return true;
 }
 
 void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
@@ -1250,15 +1269,18 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
        metric = info->attr->med;
 
        /* Determine if we're doing weighted ECMP or not */
-       do_wt_ecmp = bgp_path_info_mpath_chkwtd(info);
+       do_wt_ecmp = bgp_path_info_mpath_chkwtd(bgp, info);
        if (do_wt_ecmp)
                cum_bw = bgp_path_info_mpath_cumbw(info);
 
        for (mpinfo = info; mpinfo; mpinfo = bgp_path_info_mpath_next(mpinfo)) {
+               uint32_t nh_weight;
+
                if (valid_nh_count >= multipath_num)
                        break;
 
                *mpinfo_cp = *mpinfo;
+               nh_weight = 0;
 
                /* Get nexthop address-family */
                if (p->family == AF_INET
@@ -1271,6 +1293,15 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
                else
                        continue;
 
+               /* If processing for weighted ECMP, determine the next hop's
+                * weight. Based on user setting, we may skip the next hop
+                * in some situations.
+                */
+               if (do_wt_ecmp) {
+                       if (!bgp_zebra_use_nhop_weighted(bgp, mpinfo->attr,
+                                                        cum_bw, &nh_weight))
+                               continue;
+               }
                api_nh = &api.nexthops[valid_nh_count];
                if (nh_family == AF_INET) {
                        if (bgp_debug_zebra(&api.prefix)) {
@@ -1370,11 +1401,8 @@ void bgp_zebra_announce(struct bgp_node *rn, const struct prefix *p,
                }
                memcpy(&api_nh->rmac, &(mpinfo->attr->rmac),
                       sizeof(struct ethaddr));
+               api_nh->weight = nh_weight;
 
-               /* Update next hop's weight for weighted ECMP */
-               if (do_wt_ecmp)
-                       api_nh->weight = bgp_zebra_nhop_weight(
-                               mpinfo->attr->link_bw, cum_bw);
                valid_nh_count++;
        }
 
index e546cd5da7da035be20887c39b68fec0eaec710b..a069d01503b5effaaec271cc2f2cd4ee637b45af 100644 (file)
@@ -23,6 +23,9 @@
 
 #include "vxlan.h"
 
+/* Default weight for next hop, if doing weighted ECMP. */
+#define BGP_ZEBRA_DEFAULT_NHOP_WEIGHT 1
+
 extern void bgp_zebra_init(struct thread_master *master,
                           unsigned short instance);
 extern void bgp_zebra_init_tm_connect(struct bgp *bgp);
index d1363b70a4c68f24b52912abd2ca8485b428a133..3124efbb65888c0a65369b36b8409630ea6a715c 100644 (file)
@@ -2971,6 +2971,7 @@ static struct bgp *bgp_create(as_t *as, const char *name,
        bgp->dynamic_neighbors_limit = BGP_DYNAMIC_NEIGHBORS_LIMIT_DEFAULT;
        bgp->dynamic_neighbors_count = 0;
        bgp->lb_ref_bw = BGP_LINK_BW_REF_BW;
+       bgp->lb_handling = BGP_LINK_BW_ECMP;
        bgp->ebgp_requires_policy = DEFAULT_EBGP_POLICY_DISABLED;
        bgp->reject_as_sets = BGP_REJECT_AS_SETS_DISABLED;
        bgp_addpath_init_bgp_data(&bgp->tx_addpath);
index 24ea66b5a1819013389e13f249cd4f0e779ac7b1..f6f9687783720b169e89db75462bd1042fd7e8f4 100644 (file)
@@ -284,6 +284,20 @@ enum global_gr_command {
 #define BGP_GR_SUCCESS 0
 #define BGP_GR_FAILURE 1
 
+/* Handling of BGP link bandwidth (LB) on receiver - whether and how to
+ * do weighted ECMP. Note: This applies after multipath computation.
+ */
+enum bgp_link_bw_handling {
+       /* Do ECMP if some paths don't have LB - default */
+       BGP_LINK_BW_ECMP,
+       /* Completely ignore LB, just do regular ECMP */
+       BGP_LINK_BW_IGNORE_BW,
+       /* Skip paths without LB, do wECMP on others */
+       BGP_LINK_BW_SKIP_MISSING,
+       /* Do wECMP with default weight for paths not having LB */
+       BGP_LINK_BW_DEFWT_4_MISSING
+};
+
 /* BGP instance structure.  */
 struct bgp {
        /* AS number of this BGP instance.  */
@@ -658,6 +672,9 @@ struct bgp {
        /* Count of peers in established state */
        uint32_t established_peers;
 
+       /* Weighted ECMP related config. */
+       enum bgp_link_bw_handling lb_handling;
+
        QOBJ_FIELDS
 };
 DECLARE_QOBJ_TYPE(bgp)