#include "bgpd/bgp_route.h"
#include "bgpd/bgp_advertise.h"
#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_aspath.h"
#include "bgpd/bgp_packet.h"
#include "bgpd/bgp_fsm.h"
/* Add new advertisement to advertisement attribute list. */
bgp_advertise_add (adv->baa, adv);
- if (FIFO_EMPTY(&peer->sync[afi][safi]->update))
- bgp_adjust_routeadv(peer);
-
BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->update, &adv->fifo);
+
+ /*
+ * Schedule write thread (by triggering adjustment of MRAI timer) only if
+ * update FIFO has grown. Otherwise, it will be done upon the work queue
+ * being fully processed. Only adjust timer if needed.
+ */
+ if (!BGP_ROUTE_ADV_HOLD(peer->bgp) &&
+ (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->update) >=
+ peer->bgp->adv_quanta))
+ {
+ if (!peer->radv_adjusted)
+ {
+ if (BGP_DEBUG (events, EVENTS))
+ zlog_debug("%s scheduling MRAI timer after adj_out_set", peer->host);
+ bgp_adjust_routeadv(peer);
+ }
+ }
}
void
/* Add to synchronization entry for withdraw announcement. */
BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->withdraw, &adv->fifo);
- /* Schedule packet write. */
- BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+ /*
+ * Schedule write thread only if withdraw FIFO has grown. Otherwise,
+ * it will be done upon the work queue being fully processed.
+ */
+ if (!BGP_ROUTE_ADV_HOLD(peer->bgp) &&
+ (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->withdraw) >=
+ peer->bgp->wd_quanta))
+ {
+ if (!peer->t_write)
+ {
+ if (BGP_DEBUG (events, EVENTS))
+ zlog_debug("%s scheduling write thread after adj_out_unset",
+ peer->host);
+ BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+ }
+ }
}
else
{
(F)->count = 0; \
} while (0)
+#define BGP_ADV_FIFO_COUNT(F) \
+ (F)->count
+
/* Prototypes. */
extern void bgp_adj_out_set (struct bgp_node *, struct peer *, struct prefix *,
struct attr *, afi_t, safi_t, struct bgp_info *);
bgp_size_t nlri_len;
size_t start;
int ret;
+ int num_mp_pfx = 0;
struct stream *s;
struct peer *const peer = args->peer;
struct attr *const attr = args->attr;
if (safi != SAFI_MPLS_LABELED_VPN)
{
- ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), nlri_len);
+ ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), nlri_len,
+ &num_mp_pfx);
if (ret < 0)
{
zlog_info ("%s: (%s) NLRI doesn't pass sanity check",
safi_t safi;
u_int16_t withdraw_len;
int ret;
+ int num_mp_pfx = 0;
struct peer *const peer = args->peer;
const bgp_size_t length = args->length;
if (safi != SAFI_MPLS_LABELED_VPN)
{
- ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), withdraw_len);
+ ret = bgp_nlri_sanity_check (peer, afi, stream_pnt (s), withdraw_len,
+ &num_mp_pfx);
if (ret < 0)
return BGP_ATTR_PARSE_ERROR_NOTIFYPLS;
}
peer = THREAD_ARG (thread);
peer->t_routeadv = NULL;
+ peer->radv_adjusted = 0;
if (BGP_DEBUG (fsm, FSM))
zlog (peer->log, LOG_DEBUG,
BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
- /*
- * If there is no UPDATE to send, don't start the timer. We will start
- * it when the queues go non-empty.
+ /* MRAI timer is no longer restarted here, it would be done
+ * when the FIFO is built.
*/
- if (bgp_routeq_empty(peer))
- return 0;
-
- BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, peer->v_routeadv);
return 0;
}
double diff;
unsigned long remain;
+ /* Bypass checks for special case of MRAI being 0 */
+ if (peer->v_routeadv == 0)
+ {
+ /* Stop existing timer, just in case it is running for a different
+ * duration and schedule write thread immediately.
+ */
+ if (peer->t_routeadv)
+ BGP_TIMER_OFF(peer->t_routeadv);
+
+ peer->synctime = bgp_clock ();
+ BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+ return;
+ }
+
+ /* Mark that we've adjusted the timer */
+ peer->radv_adjusted = 1;
+
+
/*
* CASE I:
* If the last update was written more than MRAI back, expire the timer
int space_needed = 0;
size_t mpattrlen_pos = 0;
size_t mpattr_pos = 0;
+ int num_pfx_adv = 0;
s = peer->work;
stream_reset (s);
adv->baa->attr);
bgp_packet_mpattr_prefix(snlri, afi, safi, &rn->p, prd, tag);
}
+ num_pfx_adv++;
+
if (BGP_DEBUG (update, UPDATE_OUT))
{
char buf[INET6_BUFSIZ];
else
packet = stream_dup (s);
bgp_packet_set_size (packet);
+ if (BGP_DEBUG (update, UPDATE_OUT))
+ zlog(peer->log, LOG_DEBUG,
+ "%s form UPDATE (adv) total len %d numPfx %d",
+ peer->host,
+ (stream_get_endp (s) - stream_get_getp (s)), num_pfx_adv);
bgp_packet_add (peer, packet);
- BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
stream_reset (s);
stream_reset (snlri);
return packet;
u_char first_time = 1;
int space_remaining = 0;
int space_needed = 0;
+ int num_pfx_wd = 0;
s = peer->work;
stream_reset (s);
bgp_packet_mpunreach_prefix(s, &rn->p, afi, safi, prd, NULL);
}
+ num_pfx_wd++;
if (BGP_DEBUG (update, UPDATE_OUT))
{
stream_putw_at (s, attrlen_pos, total_attr_len);
}
bgp_packet_set_size (s);
+ if (BGP_DEBUG (update, UPDATE_OUT))
+ zlog(peer->log, LOG_DEBUG,
+ "%s form UPDATE (wd) total len %d numPfx %d",
+ peer->host,
+ (stream_get_endp (s) - stream_get_getp (s)), num_pfx_wd);
packet = stream_dup (s);
bgp_packet_add (peer, packet);
stream_reset (s);
return NULL;
}
-/* Is there partially written packet or updates we can send right
- now. */
-static int
-bgp_write_proceed (struct peer *peer)
+/* Are there prefixes queued for being withdrawn? */
+int
+bgp_peer_wd_fifo_exists (struct peer *peer)
{
afi_t afi;
safi_t safi;
struct bgp_advertise *adv;
- if (stream_fifo_head (peer->obuf))
- return 1;
-
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
if (FIFO_HEAD (&peer->sync[afi][safi]->withdraw))
return 1;
+ return 0;
+}
+
+/* Are there prefixes queued for being advertised?
+ * Are they recent?
+ */
+int
+bgp_peer_adv_fifo_exists (struct peer *peer, int chk_recent)
+{
+ afi_t afi;
+ safi_t safi;
+ struct bgp_advertise *adv;
+
for (afi = AFI_IP; afi < AFI_MAX; afi++)
for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++)
if ((adv = FIFO_HEAD (&peer->sync[afi][safi]->update)) != NULL)
- if (adv->binfo->uptime < peer->synctime)
- return 1;
+ {
+ if (!chk_recent)
+ return 1;
+ if (adv->binfo->uptime < peer->synctime)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Schedule updates for the peer, if needed.
+ */
+void
+bgp_peer_schedule_updates(struct peer *peer)
+{
+ /* If withdraw FIFO exists, immediately schedule write */
+ if (bgp_peer_wd_fifo_exists(peer) && !peer->t_write)
+ {
+ if (BGP_DEBUG (events, EVENTS))
+ zlog_debug("%s scheduling write thread", peer->host);
+ BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd);
+ }
+
+ /* If update FIFO exists, fire MRAI timer */
+ if (bgp_peer_adv_fifo_exists(peer, 0) && !peer->radv_adjusted)
+ {
+ if (BGP_DEBUG (events, EVENTS))
+ zlog_debug("%s scheduling MRAI timer", peer->host);
+ bgp_adjust_routeadv(peer);
+ }
+}
+
+/* Is there partially written packet or updates we can send right
+ now. */
+static int
+bgp_write_proceed (struct peer *peer)
+{
+ /* If queued packet exists, we should try to write it */
+ if (stream_fifo_head (peer->obuf))
+ return 1;
+
+ /* If there are prefixes to be withdrawn or to be advertised (and
+ * queued before last MRAI timer expiry), schedule write
+ */
+ if (bgp_peer_wd_fifo_exists(peer)
+ || bgp_peer_adv_fifo_exists(peer, 1))
+ return 1;
return 0;
}
struct bgp_nlri withdraw;
struct bgp_nlri mp_update;
struct bgp_nlri mp_withdraw;
+ int num_pfx_adv, num_pfx_wd;
/* Status must be Established. */
if (peer->status != Established)
memset (&mp_update, 0, sizeof (struct bgp_nlri));
memset (&mp_withdraw, 0, sizeof (struct bgp_nlri));
attr.extra = &extra;
+ num_pfx_adv = num_pfx_wd = 0;
s = peer->ibuf;
end = stream_pnt (s) + size;
/* Unfeasible Route packet format check. */
if (withdraw_len > 0)
{
- ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), withdraw_len);
+ ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), withdraw_len,
+ &num_pfx_wd);
if (ret < 0)
return -1;
if (update_len)
{
/* Check NLRI packet format and prefix length. */
- ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len);
+ ret = bgp_nlri_sanity_check (peer, AFI_IP, stream_pnt (s), update_len,
+ &num_pfx_adv);
if (ret < 0)
{
bgp_attr_unintern_sub (&attr);
stream_forward_getp (s, update_len);
}
+ if (BGP_DEBUG (update, UPDATE_IN))
+ zlog(peer->log, LOG_DEBUG,
+ "%s rcvd UPDATE wlen %d wpfx %d attrlen %d alen %d apfx %d",
+ peer->host, withdraw_len, num_pfx_wd, attribute_len,
+ update_len, num_pfx_adv);
+
/* NLRI is processed only when the peer is configured specific
Address Family and Subsequent Address Family. */
if (peer->afc[AFI_IP][SAFI_UNICAST])
#define BGP_UNFEASIBLE_LEN 2U
#define BGP_WRITE_PACKET_MAX 10U
+/* Size of FIFOs upon which write thread is triggered. Note that write
+ * thread is also triggered upon BGP work-queue completion.
+ */
+#define BGP_ADV_FIFO_QUANTA 500
+#define BGP_WD_FIFO_QUANTA 200
+
/* When to refresh */
#define REFRESH_IMMEDIATE 1
#define REFRESH_DEFER 2
extern void bgp_update_restarted_peers (struct peer *);
extern void bgp_update_implicit_eors (struct peer *);
extern void bgp_check_update_delay (struct bgp *);
+extern int bgp_peer_wd_fifo_exists (struct peer *);
+extern int bgp_peer_adv_fifo_exists (struct peer *, int);
+extern void bgp_peer_schedule_updates(struct peer *peer);
#endif /* _QUAGGA_BGP_PACKET_H */
struct bgp_info *nextri = NULL;
int paths_eq, do_mpath;
struct list mp_list;
+ char buf[INET6_BUFSIZ];
bgp_mp_list_init (&mp_list);
do_mpath = (mpath_cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS ||
XFREE (MTYPE_BGP_PROCESS_QUEUE, pq);
}
+static void
+bgp_process_queue_complete (struct work_queue *wq)
+{
+ struct bgp *bgp;
+ struct peer *peer;
+ struct listnode *node, *nnode;
+
+ /* Schedule write thread either directly or through the MRAI timer
+ * if needed.
+ */
+ bgp = bgp_get_default ();
+ if (!bgp)
+ return;
+
+ if (BGP_ROUTE_ADV_HOLD(bgp))
+ return;
+
+ for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer))
+ bgp_peer_schedule_updates(peer);
+}
+
void
bgp_process_queue_init (void)
{
bm->process_main_queue->spec.workfunc = &bgp_process_main;
bm->process_main_queue->spec.del_item_data = &bgp_processq_del;
+ bm->process_main_queue->spec.completion_func = &bgp_process_queue_complete;
bm->process_main_queue->spec.max_retries = 0;
bm->process_main_queue->spec.hold = 50;
+ /* Use a higher yield value of 50ms for main queue processing */
+ bm->process_main_queue->spec.yield = 50 * 1000L;
memcpy (bm->process_rsclient_queue, bm->process_main_queue,
sizeof (struct work_queue *));
if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT))
bgp_announce_table (peer, afi, safi, NULL, 1);
+
+ /*
+ * The write thread needs to be scheduled since it may not be done as
+ * part of building adj_out.
+ */
+ bgp_peer_schedule_updates(peer);
}
void
/* NLRI encode syntax check routine. */
int
bgp_nlri_sanity_check (struct peer *peer, int afi, u_char *pnt,
- bgp_size_t length)
+ bgp_size_t length, int *numpfx)
{
u_char *end;
u_char prefixlen;
int psize;
+ *numpfx = 0;
end = pnt + length;
/* RFC1771 6.3 The NLRI field in the UPDATE message is checked for
}
pnt += psize;
+ (*numpfx)++;
}
/* Packet length consistency check. */
extern void bgp_info_set_flag (struct bgp_node *, struct bgp_info *, u_int32_t);
extern void bgp_info_unset_flag (struct bgp_node *, struct bgp_info *, u_int32_t);
-extern int bgp_nlri_sanity_check (struct peer *, int, u_char *, bgp_size_t);
+extern int bgp_nlri_sanity_check (struct peer *, int, u_char *, bgp_size_t, int *);
extern int bgp_nlri_parse (struct peer *, struct attr *, struct bgp_nlri *);
extern int bgp_maximum_prefix_overflow (struct peer *, afi_t, safi_t, int);
bgp->name = strdup (name);
bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX;
+ bgp->adv_quanta = BGP_ADV_FIFO_QUANTA;
+ bgp->wd_quanta = BGP_WD_FIFO_QUANTA;
THREAD_TIMER_ON (master, bgp->t_startup, bgp_startup_timer_expire,
bgp, bgp->restart_time);
} maxpaths[AFI_MAX][SAFI_MAX];
u_int32_t wpkt_quanta; /* per peer packet quanta to write */
+ u_int32_t adv_quanta; /* adv FIFO size that triggers write */
+ u_int32_t wd_quanta; /* withdraw FIFO size that triggers write */
};
+#define BGP_ROUTE_ADV_HOLD(bgp) \
+ (bgp->main_peers_update_hold || bgp->rsclient_peers_update_hold)
+
/* BGP peer-group support. */
struct peer_group
{
struct thread *t_gr_restart;
struct thread *t_gr_stale;
+ int radv_adjusted; /* flag if MRAI has been adjusted or not */
+
/* workqueues */
struct work_queue *clear_node_queue;