From: Donald Sharp Date: Wed, 20 May 2015 00:58:12 +0000 (-0700) Subject: Changes to improve BGP convergence time: X-Git-Tag: frr-2.0-rc1~1494 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=d889623f1ac5e14d761a0cb7896d33cfdc4f3615;p=mirror_frr.git Changes to improve BGP convergence time: - Schedule write thread for advertisements and withdraws only if corresponding FIFOs are growing and/or upon work_queue getting fully processed. - Set non-default yield time for the main work_queue, as the default value of 10ms results in yielding after processing very few nodes. - Remove unnecessary scheduling of write thread when update packet is formed. - If MRAI is 0, don't start a timer unnecessarily, directly schedule write thread. - Some debugs. --- diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index 9043c83b2..c6b4e5920 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_route.h" #include "bgpd/bgp_advertise.h" #include "bgpd/bgp_attr.h" +#include "bgpd/bgp_debug.h" #include "bgpd/bgp_aspath.h" #include "bgpd/bgp_packet.h" #include "bgpd/bgp_fsm.h" @@ -267,10 +268,24 @@ bgp_adj_out_set (struct bgp_node *rn, struct peer *peer, struct prefix *p, /* Add new advertisement to advertisement attribute list. */ bgp_advertise_add (adv->baa, adv); - if (FIFO_EMPTY(&peer->sync[afi][safi]->update)) - bgp_adjust_routeadv(peer); - BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->update, &adv->fifo); + + /* + * Schedule write thread (by triggering adjustment of MRAI timer) only if + * update FIFO has grown. Otherwise, it will be done upon the work queue + * being fully processed. Only adjust timer if needed. + */ + if (!BGP_ROUTE_ADV_HOLD(peer->bgp) && + (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->update) >= + peer->bgp->adv_quanta)) + { + if (!peer->radv_adjusted) + { + if (BGP_DEBUG (events, EVENTS)) + zlog_debug("%s scheduling MRAI timer after adj_out_set", peer->host); + bgp_adjust_routeadv(peer); + } + } } void @@ -306,8 +321,22 @@ bgp_adj_out_unset (struct bgp_node *rn, struct peer *peer, struct prefix *p, /* Add to synchronization entry for withdraw announcement. */ BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->withdraw, &adv->fifo); - /* Schedule packet write. */ - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + /* + * Schedule write thread only if withdraw FIFO has grown. Otherwise, + * it will be done upon the work queue being fully processed. + */ + if (!BGP_ROUTE_ADV_HOLD(peer->bgp) && + (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->withdraw) >= + peer->bgp->wd_quanta)) + { + if (!peer->t_write) + { + if (BGP_DEBUG (events, EVENTS)) + zlog_debug("%s scheduling write thread after adj_out_unset", + peer->host); + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + } + } } else { diff --git a/bgpd/bgp_advertise.h b/bgpd/bgp_advertise.h index 672d42d8c..36ab57698 100644 --- a/bgpd/bgp_advertise.h +++ b/bgpd/bgp_advertise.h @@ -146,6 +146,9 @@ struct bgp_synchronize (F)->count = 0; \ } while (0) +#define BGP_ADV_FIFO_COUNT(F) \ + (F)->count + /* Prototypes. */ extern void bgp_adj_out_set (struct bgp_node *, struct peer *, struct prefix *, struct attr *, afi_t, safi_t, struct bgp_info *); diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index b617fe64c..6f911c26e 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -1526,6 +1526,7 @@ bgp_mp_reach_parse (struct bgp_attr_parser_args *args, bgp_size_t nlri_len; size_t start; int ret; + int num_mp_pfx = 0; struct stream *s; struct peer *const peer = args->peer; struct attr *const attr = args->attr; @@ -1628,7 +1629,8 @@ bgp_mp_reach_parse (struct bgp_attr_parser_args *args, if (safi != SAFI_MPLS_LABELED_VPN) { - ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), nlri_len); + ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), nlri_len, + &num_mp_pfx); if (ret < 0) { zlog_info ("%s: (%s) NLRI doesn't pass sanity check", @@ -1658,6 +1660,7 @@ bgp_mp_unreach_parse (struct bgp_attr_parser_args *args, safi_t safi; u_int16_t withdraw_len; int ret; + int num_mp_pfx = 0; struct peer *const peer = args->peer; const bgp_size_t length = args->length; @@ -1674,7 +1677,8 @@ bgp_mp_unreach_parse (struct bgp_attr_parser_args *args, if (safi != SAFI_MPLS_LABELED_VPN) { - ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), withdraw_len); + ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), withdraw_len, + &num_mp_pfx); if (ret < 0) return BGP_ATTR_PARSE_ERROR_NOTIFYPLS; } diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index 13325190a..1a392b2b1 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -416,6 +416,7 @@ bgp_routeadv_timer (struct thread *thread) peer = THREAD_ARG (thread); peer->t_routeadv = NULL; + peer->radv_adjusted = 0; if (BGP_DEBUG (fsm, FSM)) zlog (peer->log, LOG_DEBUG, @@ -426,14 +427,9 @@ bgp_routeadv_timer (struct thread *thread) BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - /* - * If there is no UPDATE to send, don't start the timer. We will start - * it when the queues go non-empty. + /* MRAI timer is no longer restarted here, it would be done + * when the FIFO is built. */ - if (bgp_routeq_empty(peer)) - return 0; - - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, peer->v_routeadv); return 0; } @@ -628,6 +624,24 @@ bgp_adjust_routeadv (struct peer *peer) double diff; unsigned long remain; + /* Bypass checks for special case of MRAI being 0 */ + if (peer->v_routeadv == 0) + { + /* Stop existing timer, just in case it is running for a different + * duration and schedule write thread immediately. + */ + if (peer->t_routeadv) + BGP_TIMER_OFF(peer->t_routeadv); + + peer->synctime = bgp_clock (); + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } + + /* Mark that we've adjusted the timer */ + peer->radv_adjusted = 1; + + /* * CASE I: * If the last update was written more than MRAI back, expire the timer diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 524c75611..0b41c7d1a 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -157,6 +157,7 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi) int space_needed = 0; size_t mpattrlen_pos = 0; size_t mpattr_pos = 0; + int num_pfx_adv = 0; s = peer->work; stream_reset (s); @@ -248,6 +249,8 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi) adv->baa->attr); bgp_packet_mpattr_prefix(snlri, afi, safi, &rn->p, prd, tag); } + num_pfx_adv++; + if (BGP_DEBUG (update, UPDATE_OUT)) { char buf[INET6_BUFSIZ]; @@ -285,8 +288,12 @@ bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi) else packet = stream_dup (s); bgp_packet_set_size (packet); + if (BGP_DEBUG (update, UPDATE_OUT)) + zlog(peer->log, LOG_DEBUG, + "%s form UPDATE (adv) total len %d numPfx %d", + peer->host, + (stream_get_endp (s) - stream_get_getp (s)), num_pfx_adv); bgp_packet_add (peer, packet); - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); stream_reset (s); stream_reset (snlri); return packet; @@ -364,6 +371,7 @@ bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t safi) u_char first_time = 1; int space_remaining = 0; int space_needed = 0; + int num_pfx_wd = 0; s = peer->work; stream_reset (s); @@ -411,6 +419,7 @@ bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t safi) bgp_packet_mpunreach_prefix(s, &rn->p, afi, safi, prd, NULL); } + num_pfx_wd++; if (BGP_DEBUG (update, UPDATE_OUT)) { @@ -447,6 +456,11 @@ bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t safi) stream_putw_at (s, attrlen_pos, total_attr_len); } bgp_packet_set_size (s); + if (BGP_DEBUG (update, UPDATE_OUT)) + zlog(peer->log, LOG_DEBUG, + "%s form UPDATE (wd) total len %d numPfx %d", + peer->host, + (stream_get_endp (s) - stream_get_getp (s)), num_pfx_wd); packet = stream_dup (s); bgp_packet_add (peer, packet); stream_reset (s); @@ -684,28 +698,83 @@ bgp_write_packet (struct peer *peer) return NULL; } -/* Is there partially written packet or updates we can send right - now. */ -static int -bgp_write_proceed (struct peer *peer) +/* Are there prefixes queued for being withdrawn? */ +int +bgp_peer_wd_fifo_exists (struct peer *peer) { afi_t afi; safi_t safi; struct bgp_advertise *adv; - if (stream_fifo_head (peer->obuf)) - return 1; - for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) if (FIFO_HEAD (&peer->sync[afi][safi]->withdraw)) return 1; + return 0; +} + +/* Are there prefixes queued for being advertised? + * Are they recent? + */ +int +bgp_peer_adv_fifo_exists (struct peer *peer, int chk_recent) +{ + afi_t afi; + safi_t safi; + struct bgp_advertise *adv; + for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) if ((adv = FIFO_HEAD (&peer->sync[afi][safi]->update)) != NULL) - if (adv->binfo->uptime < peer->synctime) - return 1; + { + if (!chk_recent) + return 1; + if (adv->binfo->uptime < peer->synctime) + return 1; + } + + return 0; +} + +/* + * Schedule updates for the peer, if needed. + */ +void +bgp_peer_schedule_updates(struct peer *peer) +{ + /* If withdraw FIFO exists, immediately schedule write */ + if (bgp_peer_wd_fifo_exists(peer) && !peer->t_write) + { + if (BGP_DEBUG (events, EVENTS)) + zlog_debug("%s scheduling write thread", peer->host); + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + } + + /* If update FIFO exists, fire MRAI timer */ + if (bgp_peer_adv_fifo_exists(peer, 0) && !peer->radv_adjusted) + { + if (BGP_DEBUG (events, EVENTS)) + zlog_debug("%s scheduling MRAI timer", peer->host); + bgp_adjust_routeadv(peer); + } +} + +/* Is there partially written packet or updates we can send right + now. */ +static int +bgp_write_proceed (struct peer *peer) +{ + /* If queued packet exists, we should try to write it */ + if (stream_fifo_head (peer->obuf)) + return 1; + + /* If there are prefixes to be withdrawn or to be advertised (and + * queued before last MRAI timer expiry), schedule write + */ + if (bgp_peer_wd_fifo_exists(peer) + || bgp_peer_adv_fifo_exists(peer, 1)) + return 1; return 0; } @@ -1654,6 +1723,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) struct bgp_nlri withdraw; struct bgp_nlri mp_update; struct bgp_nlri mp_withdraw; + int num_pfx_adv, num_pfx_wd; /* Status must be Established. */ if (peer->status != Established) @@ -1672,6 +1742,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) memset (&mp_update, 0, sizeof (struct bgp_nlri)); memset (&mp_withdraw, 0, sizeof (struct bgp_nlri)); attr.extra = &extra; + num_pfx_adv = num_pfx_wd = 0; s = peer->ibuf; end = stream_pnt (s) + size; @@ -1707,7 +1778,8 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) /* Unfeasible Route packet format check. */ if (withdraw_len > 0) { - ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), withdraw_len); + ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), withdraw_len, + &num_pfx_wd); if (ret < 0) return -1; @@ -1798,7 +1870,8 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) if (update_len) { /* Check NLRI packet format and prefix length. */ - ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len); + ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len, + &num_pfx_adv); if (ret < 0) { bgp_attr_unintern_sub (&attr); @@ -1813,6 +1886,12 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) stream_forward_getp (s, update_len); } + if (BGP_DEBUG (update, UPDATE_IN)) + zlog(peer->log, LOG_DEBUG, + "%s rcvd UPDATE wlen %d wpfx %d attrlen %d alen %d apfx %d", + peer->host, withdraw_len, num_pfx_wd, attribute_len, + update_len, num_pfx_adv); + /* NLRI is processed only when the peer is configured specific Address Family and Subsequent Address Family. */ if (peer->afc[AFI_IP][SAFI_UNICAST]) diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h index fe3917f92..18b0024a7 100644 --- a/bgpd/bgp_packet.h +++ b/bgpd/bgp_packet.h @@ -26,6 +26,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define BGP_UNFEASIBLE_LEN 2U #define BGP_WRITE_PACKET_MAX 10U +/* Size of FIFOs upon which write thread is triggered. Note that write + * thread is also triggered upon BGP work-queue completion. + */ +#define BGP_ADV_FIFO_QUANTA 500 +#define BGP_WD_FIFO_QUANTA 200 + /* When to refresh */ #define REFRESH_IMMEDIATE 1 #define REFRESH_DEFER 2 @@ -57,4 +63,7 @@ extern int bgp_capability_receive (struct peer *, bgp_size_t); extern void bgp_update_restarted_peers (struct peer *); extern void bgp_update_implicit_eors (struct peer *); extern void bgp_check_update_delay (struct bgp *); +extern int bgp_peer_wd_fifo_exists (struct peer *); +extern int bgp_peer_adv_fifo_exists (struct peer *, int); +extern void bgp_peer_schedule_updates(struct peer *peer); #endif /* _QUAGGA_BGP_PACKET_H */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 88db54da0..e8cbe8c26 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -1420,6 +1420,7 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, struct bgp_info *nextri = NULL; int paths_eq, do_mpath; struct list mp_list; + char buf[INET6_BUFSIZ]; bgp_mp_list_init (&mp_list); do_mpath = (mpath_cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS || @@ -1789,6 +1790,27 @@ bgp_processq_del (struct work_queue *wq, void *data) XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } +static void +bgp_process_queue_complete (struct work_queue *wq) +{ + struct bgp *bgp; + struct peer *peer; + struct listnode *node, *nnode; + + /* Schedule write thread either directly or through the MRAI timer + * if needed. + */ + bgp = bgp_get_default (); + if (!bgp) + return; + + if (BGP_ROUTE_ADV_HOLD(bgp)) + return; + + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) + bgp_peer_schedule_updates(peer); +} + void bgp_process_queue_init (void) { @@ -1805,8 +1827,11 @@ bgp_process_queue_init (void) bm->process_main_queue->spec.workfunc = &bgp_process_main; bm->process_main_queue->spec.del_item_data = &bgp_processq_del; + bm->process_main_queue->spec.completion_func = &bgp_process_queue_complete; bm->process_main_queue->spec.max_retries = 0; bm->process_main_queue->spec.hold = 50; + /* Use a higher yield value of 50ms for main queue processing */ + bm->process_main_queue->spec.yield = 50 * 1000L; memcpy (bm->process_rsclient_queue, bm->process_main_queue, sizeof (struct work_queue *)); @@ -2820,6 +2845,12 @@ bgp_announce_route (struct peer *peer, afi_t afi, safi_t safi) if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) bgp_announce_table (peer, afi, safi, NULL, 1); + + /* + * The write thread needs to be scheduled since it may not be done as + * part of building adj_out. + */ + bgp_peer_schedule_updates(peer); } void @@ -3374,12 +3405,13 @@ bgp_nlri_parse (struct peer *peer, struct attr *attr, struct bgp_nlri *packet) /* NLRI encode syntax check routine. */ int bgp_nlri_sanity_check (struct peer *peer, int afi, u_char *pnt, - bgp_size_t length) + bgp_size_t length, int *numpfx) { u_char *end; u_char prefixlen; int psize; + *numpfx = 0; end = pnt + length; /* RFC1771 6.3 The NLRI field in the UPDATE message is checked for @@ -3417,6 +3449,7 @@ bgp_nlri_sanity_check (struct peer *peer, int afi, u_char *pnt, } pnt += psize; + (*numpfx)++; } /* Packet length consistency check. */ diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index 655bd6ca7..56d72c4e7 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -217,7 +217,7 @@ extern struct bgp_info_extra *bgp_info_extra_get (struct bgp_info *); extern void bgp_info_set_flag (struct bgp_node *, struct bgp_info *, u_int32_t); extern void bgp_info_unset_flag (struct bgp_node *, struct bgp_info *, u_int32_t); -extern int bgp_nlri_sanity_check (struct peer *, int, u_char *, bgp_size_t); +extern int bgp_nlri_sanity_check (struct peer *, int, u_char *, bgp_size_t, int *); extern int bgp_nlri_parse (struct peer *, struct attr *, struct bgp_nlri *); extern int bgp_maximum_prefix_overflow (struct peer *, afi_t, safi_t, int); diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index b205df262..bb60f08eb 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -2251,6 +2251,8 @@ bgp_create (as_t *as, const char *name) bgp->name = strdup (name); bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX; + bgp->adv_quanta = BGP_ADV_FIFO_QUANTA; + bgp->wd_quanta = BGP_WD_FIFO_QUANTA; THREAD_TIMER_ON (master, bgp->t_startup, bgp_startup_timer_expire, bgp, bgp->restart_time); diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 2e0419d30..2dbc64343 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -224,8 +224,13 @@ struct bgp } maxpaths[AFI_MAX][SAFI_MAX]; u_int32_t wpkt_quanta; /* per peer packet quanta to write */ + u_int32_t adv_quanta; /* adv FIFO size that triggers write */ + u_int32_t wd_quanta; /* withdraw FIFO size that triggers write */ }; +#define BGP_ROUTE_ADV_HOLD(bgp) \ + (bgp->main_peers_update_hold || bgp->rsclient_peers_update_hold) + /* BGP peer-group support. */ struct peer_group { @@ -551,6 +556,8 @@ struct peer struct thread *t_gr_restart; struct thread *t_gr_stale; + int radv_adjusted; /* flag if MRAI has been adjusted or not */ + /* workqueues */ struct work_queue *clear_node_queue;