2 * bgp_updgrp_adv.c: BGP update group advertisement and adjacency
6 * @copyright Copyright (C) 2014 Cumulus Networks, Inc.
8 * @author Avneesh Sachdev <avneesh@sproute.net>
9 * @author Rajesh Varadarajan <rajesh@sproute.net>
10 * @author Pradosh Mohapatra <pradosh@sproute.net>
12 * This file is part of GNU Zebra.
14 * GNU Zebra is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the
16 * Free Software Foundation; either version 2, or (at your option) any
19 * GNU Zebra is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with GNU Zebra; see the file COPYING. If not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
40 #include "bgpd/bgpd.h"
41 #include "bgpd/bgp_table.h"
42 #include "bgpd/bgp_debug.h"
43 #include "bgpd/bgp_route.h"
44 #include "bgpd/bgp_advertise.h"
45 #include "bgpd/bgp_attr.h"
46 #include "bgpd/bgp_aspath.h"
47 #include "bgpd/bgp_packet.h"
48 #include "bgpd/bgp_fsm.h"
49 #include "bgpd/bgp_mplsvpn.h"
50 #include "bgpd/bgp_updgrp.h"
51 #include "bgpd/bgp_advertise.h"
58 static inline struct bgp_adj_out
*
59 adj_lookup (struct bgp_node
*rn
, struct update_subgroup
*subgrp
)
61 struct bgp_adj_out
*adj
;
65 for (adj
= rn
->adj_out
; adj
; adj
= adj
->next
)
66 if (adj
->subgroup
== subgrp
)
72 adj_free (struct bgp_adj_out
*adj
)
74 TAILQ_REMOVE (&(adj
->subgroup
->adjq
), adj
, subgrp_adj_train
);
75 SUBGRP_DECR_STAT (adj
->subgroup
, adj_count
);
76 XFREE (MTYPE_BGP_ADJ_OUT
, adj
);
80 group_announce_route_walkcb (struct update_group
*updgrp
, void *arg
)
82 struct updwalk_context
*ctx
= arg
;
83 struct update_subgroup
*subgrp
;
85 UPDGRP_FOREACH_SUBGRP (updgrp
, subgrp
)
89 * Skip the subgroups that have coalesce timer running. We will
90 * walk the entire prefix table for those subgroups when the
91 * coalesce timer fires.
93 if (!subgrp
->t_coalesce
)
94 subgroup_process_announce_selected (subgrp
, ctx
->ri
, ctx
->rn
);
97 return UPDWALK_CONTINUE
;
101 subgrp_show_adjq_vty (struct update_subgroup
*subgrp
, struct vty
*vty
,
104 struct bgp_table
*table
;
105 struct bgp_adj_out
*adj
;
106 unsigned long output_count
;
112 bgp
= SUBGRP_INST (subgrp
);
116 table
= bgp
->rib
[SUBGRP_AFI (subgrp
)][SUBGRP_SAFI (subgrp
)];
120 for (rn
= bgp_table_top (table
); rn
; rn
= bgp_route_next (rn
))
121 for (adj
= rn
->adj_out
; adj
; adj
= adj
->next
)
122 if (adj
->subgroup
== subgrp
)
127 "BGP table version is %" PRIu64
", local router ID is %s%s",
128 table
->version
, inet_ntoa (bgp
->router_id
),
130 vty_out (vty
, BGP_SHOW_SCODE_HEADER
, VTY_NEWLINE
, VTY_NEWLINE
);
131 vty_out (vty
, BGP_SHOW_OCODE_HEADER
, VTY_NEWLINE
, VTY_NEWLINE
);
136 vty_out (vty
, BGP_SHOW_HEADER
, VTY_NEWLINE
);
139 if ((flags
& UPDWALK_FLAGS_ADVQUEUE
) && adj
->adv
&& adj
->adv
->baa
)
141 route_vty_out_tmp (vty
, &rn
->p
, adj
->adv
->baa
->attr
, SUBGRP_SAFI (subgrp
), 0, NULL
);
144 if ((flags
& UPDWALK_FLAGS_ADVERTISED
) && adj
->attr
)
146 route_vty_out_tmp (vty
, &rn
->p
, adj
->attr
, SUBGRP_SAFI (subgrp
), 0, NULL
);
150 if (output_count
!= 0)
151 vty_out (vty
, "%sTotal number of prefixes %ld%s",
152 VTY_NEWLINE
, output_count
, VTY_NEWLINE
);
156 updgrp_show_adj_walkcb (struct update_group
*updgrp
, void *arg
)
158 struct updwalk_context
*ctx
= arg
;
159 struct update_subgroup
*subgrp
;
163 UPDGRP_FOREACH_SUBGRP (updgrp
, subgrp
)
165 if (ctx
->subgrp_id
&& (ctx
->subgrp_id
!= subgrp
->id
))
167 vty_out (vty
, "update group %" PRIu64
", subgroup %" PRIu64
"%s", updgrp
->id
,
168 subgrp
->id
, VTY_NEWLINE
);
169 subgrp_show_adjq_vty (subgrp
, vty
, ctx
->flags
);
171 return UPDWALK_CONTINUE
;
175 updgrp_show_adj (struct bgp
*bgp
, afi_t afi
, safi_t safi
,
176 struct vty
*vty
, u_int64_t id
, u_int8_t flags
)
178 struct updwalk_context ctx
;
179 memset (&ctx
, 0, sizeof (ctx
));
184 update_group_af_walk (bgp
, afi
, safi
, updgrp_show_adj_walkcb
, &ctx
);
188 subgroup_coalesce_timer (struct thread
*thread
)
190 struct update_subgroup
*subgrp
;
192 subgrp
= THREAD_ARG (thread
);
193 if (bgp_debug_update(NULL
, NULL
, subgrp
->update_group
, 0))
194 zlog_debug ("u%" PRIu64
":s%" PRIu64
" announcing routes upon coalesce timer expiry",
195 (SUBGRP_UPDGRP (subgrp
))->id
, subgrp
->id
);
196 subgrp
->t_coalesce
= NULL
;
197 subgrp
->v_coalesce
= 0;
198 subgroup_announce_route (subgrp
);
201 /* While the announce_route() may kick off the route advertisement timer for
202 * the members of the subgroup, we'd like to send the initial updates much
203 * faster (i.e., without enforcing MRAI). Also, if there were no routes to
204 * announce, this is the method currently employed to trigger the EOR.
206 if (!bgp_update_delay_active(SUBGRP_INST(subgrp
)))
211 SUBGRP_FOREACH_PEER (subgrp
, paf
)
213 peer
= PAF_PEER(paf
);
214 BGP_TIMER_OFF(peer
->t_routeadv
);
215 BGP_TIMER_ON (peer
->t_routeadv
, bgp_routeadv_timer
, 0);
223 update_group_announce_walkcb (struct update_group
*updgrp
, void *arg
)
225 struct update_subgroup
*subgrp
;
227 UPDGRP_FOREACH_SUBGRP (updgrp
, subgrp
)
229 subgroup_announce_all (subgrp
);
232 return UPDWALK_CONTINUE
;
236 update_group_announce_rrc_walkcb (struct update_group
*updgrp
, void *arg
)
238 struct update_subgroup
*subgrp
;
243 afi
= UPDGRP_AFI (updgrp
);
244 safi
= UPDGRP_SAFI (updgrp
);
245 peer
= UPDGRP_PEER (updgrp
);
247 /* Only announce if this is a group of route-reflector-clients */
248 if (CHECK_FLAG(peer
->af_flags
[afi
][safi
], PEER_FLAG_REFLECTOR_CLIENT
))
250 UPDGRP_FOREACH_SUBGRP (updgrp
, subgrp
)
252 subgroup_announce_all (subgrp
);
256 return UPDWALK_CONTINUE
;
259 /********************
261 ********************/
264 * Allocate an adj-out object. Do proper initialization of its fields,
265 * primarily its association with the subgroup and the prefix.
268 bgp_adj_out_alloc (struct update_subgroup
*subgrp
, struct bgp_node
*rn
)
270 struct bgp_adj_out
*adj
;
272 adj
= XCALLOC (MTYPE_BGP_ADJ_OUT
, sizeof (struct bgp_adj_out
));
273 adj
->subgroup
= subgrp
;
276 BGP_ADJ_OUT_ADD (rn
, adj
);
280 TAILQ_INSERT_TAIL (&(subgrp
->adjq
), adj
, subgrp_adj_train
);
281 SUBGRP_INCR_STAT (subgrp
, adj_count
);
286 struct bgp_advertise
*
287 bgp_advertise_clean_subgroup (struct update_subgroup
*subgrp
,
288 struct bgp_adj_out
*adj
)
290 struct bgp_advertise
*adv
;
291 struct bgp_advertise_attr
*baa
;
292 struct bgp_advertise
*next
;
293 struct bgp_advertise_fifo
*fhead
;
301 fhead
= &subgrp
->sync
->update
;
303 /* Unlink myself from advertise attribute FIFO. */
304 bgp_advertise_delete (baa
, adv
);
306 /* Fetch next advertise candidate. */
309 /* Unintern BGP advertise attribute. */
310 bgp_advertise_unintern (subgrp
->hash
, baa
);
313 fhead
= &subgrp
->sync
->withdraw
;
316 /* Unlink myself from advertisement FIFO. */
317 BGP_ADV_FIFO_DEL (fhead
, adv
);
320 bgp_advertise_free (adj
->adv
);
327 bgp_adj_out_set_subgroup (struct bgp_node
*rn
,
328 struct update_subgroup
*subgrp
,
329 struct attr
*attr
, struct bgp_info
*binfo
)
331 struct bgp_adj_out
*adj
= NULL
;
332 struct bgp_advertise
*adv
;
334 if (DISABLE_BGP_ANNOUNCE
)
337 /* Look for adjacency information. */
338 adj
= adj_lookup (rn
, subgrp
);
342 adj
= bgp_adj_out_alloc (subgrp
, rn
);
348 bgp_advertise_clean_subgroup (subgrp
, adj
);
349 adj
->adv
= bgp_advertise_new ();
353 assert (adv
->binfo
== NULL
);
354 adv
->binfo
= bgp_info_lock (binfo
); /* bgp_info adj_out reference */
357 adv
->baa
= bgp_advertise_intern (subgrp
->hash
, attr
);
359 adv
->baa
= baa_new ();
362 /* Add new advertisement to advertisement attribute list. */
363 bgp_advertise_add (adv
->baa
, adv
);
366 * If the update adv list is empty, trigger the member peers'
367 * mrai timers so the socket writes can happen.
369 if (BGP_ADV_FIFO_EMPTY (&subgrp
->sync
->update
))
373 SUBGRP_FOREACH_PEER (subgrp
, paf
)
375 bgp_adjust_routeadv (PAF_PEER (paf
));
379 BGP_ADV_FIFO_ADD (&subgrp
->sync
->update
, &adv
->fifo
);
381 subgrp
->version
= max (subgrp
->version
, rn
->version
);
384 /* The only time 'withdraw' will be false is if we are sending
385 * the "neighbor x.x.x.x default-originate" default and need to clear
386 * bgp_adj_out for the 0.0.0.0/0 route in the BGP table.
389 bgp_adj_out_unset_subgroup (struct bgp_node
*rn
,
390 struct update_subgroup
*subgrp
,
393 struct bgp_adj_out
*adj
;
394 struct bgp_advertise
*adv
;
397 if (DISABLE_BGP_ANNOUNCE
)
400 /* Lookup existing adjacency, if it is not there return immediately. */
401 adj
= adj_lookup (rn
, subgrp
);
406 /* Clean up previous advertisement. */
408 bgp_advertise_clean_subgroup (subgrp
, adj
);
410 if (adj
->attr
&& withdraw
)
412 /* We need advertisement structure. */
413 adj
->adv
= bgp_advertise_new ();
418 /* Note if we need to trigger a packet write */
419 if (BGP_ADV_FIFO_EMPTY (&subgrp
->sync
->withdraw
))
424 /* Add to synchronization entry for withdraw announcement. */
425 BGP_ADV_FIFO_ADD (&subgrp
->sync
->withdraw
, &adv
->fifo
);
427 /* Schedule packet write, if FIFO is getting its first entry. */
429 subgroup_trigger_write(subgrp
);
433 /* Remove myself from adjacency. */
434 BGP_ADJ_OUT_DEL (rn
, adj
);
436 /* Free allocated information. */
439 bgp_unlock_node (rn
);
447 subgrp
->version
= max (subgrp
->version
, rn
->version
);
451 bgp_adj_out_remove_subgroup (struct bgp_node
*rn
, struct bgp_adj_out
*adj
,
452 struct update_subgroup
*subgrp
)
455 bgp_attr_unintern (&adj
->attr
);
458 bgp_advertise_clean_subgroup (subgrp
, adj
);
460 BGP_ADJ_OUT_DEL (rn
, adj
);
465 * Go through all the routes and clean up the adj/adv structures corresponding
469 subgroup_clear_table (struct update_subgroup
*subgrp
)
471 struct bgp_adj_out
*aout
, *taout
;
473 SUBGRP_FOREACH_ADJ_SAFE (subgrp
, aout
, taout
)
475 bgp_unlock_node (aout
->rn
);
476 bgp_adj_out_remove_subgroup (aout
->rn
, aout
, subgrp
);
481 * subgroup_announce_table
484 subgroup_announce_table (struct update_subgroup
*subgrp
,
485 struct bgp_table
*table
, int rsclient
)
490 struct attr_extra extra
;
492 struct peer
*onlypeer
;
496 peer
= SUBGRP_PEER (subgrp
);
497 afi
= SUBGRP_AFI (subgrp
);
498 safi
= SUBGRP_SAFI (subgrp
);
500 onlypeer
= ((SUBGRP_PCOUNT (subgrp
) == 1) ?
501 (SUBGRP_PFIRST (subgrp
))->peer
: NULL
);
506 table
= (rsclient
) ? onlypeer
->rib
[afi
][safi
] : peer
->bgp
->rib
[afi
][safi
];
508 if (safi
!= SAFI_MPLS_VPN
509 && CHECK_FLAG (peer
->af_flags
[afi
][safi
], PEER_FLAG_DEFAULT_ORIGINATE
))
510 subgroup_default_originate (subgrp
, 0);
512 /* It's initialized in bgp_announce_[check|check_rsclient]() */
515 for (rn
= bgp_table_top (table
); rn
; rn
= bgp_route_next (rn
))
516 for (ri
= rn
->info
; ri
; ri
= ri
->next
)
518 if (CHECK_FLAG (ri
->flags
, BGP_INFO_SELECTED
))
521 && subgroup_announce_check (ri
, subgrp
, &rn
->p
, &attr
))
522 bgp_adj_out_set_subgroup (rn
, subgrp
, &attr
, ri
);
524 bgp_adj_out_unset_subgroup (rn
, subgrp
, 1);
528 * We walked through the whole table -- make sure our version number
529 * is consistent with the one on the table. This should allow
530 * subgroups to merge sooner if a peer comes up when the route node
531 * with the largest version is no longer in the table. This also
532 * covers the pathological case where all routes in the table have
535 subgrp
->version
= max (subgrp
->version
, table
->version
);
538 * Start a task to merge the subgroup if necessary.
540 update_subgroup_trigger_merge_check (subgrp
, 0);
544 * subgroup_announce_route
546 * Refresh all routes out to a subgroup.
549 subgroup_announce_route (struct update_subgroup
*subgrp
)
552 struct bgp_table
*table
;
553 struct peer
*onlypeer
;
556 if (update_subgroup_needs_refresh (subgrp
))
558 update_subgroup_set_needs_refresh (subgrp
, 0);
562 * First update is deferred until ORF or ROUTE-REFRESH is received
564 onlypeer
= ((SUBGRP_PCOUNT (subgrp
) == 1) ?
565 (SUBGRP_PFIRST (subgrp
))->peer
: NULL
);
567 CHECK_FLAG (onlypeer
->
568 af_sflags
[SUBGRP_AFI (subgrp
)][SUBGRP_SAFI (subgrp
)],
569 PEER_STATUS_ORF_WAIT_REFRESH
))
572 if (SUBGRP_SAFI (subgrp
) != SAFI_MPLS_VPN
)
573 subgroup_announce_table (subgrp
, NULL
, 0);
575 for (rn
= bgp_table_top (update_subgroup_rib (subgrp
)); rn
;
576 rn
= bgp_route_next (rn
))
577 if ((table
= (rn
->info
)) != NULL
)
578 subgroup_announce_table (subgrp
, table
, 0);
580 peer
= SUBGRP_PEER(subgrp
);
581 if (CHECK_FLAG(peer
->af_flags
[SUBGRP_AFI(subgrp
)][SUBGRP_SAFI(subgrp
)],
582 PEER_FLAG_RSERVER_CLIENT
))
583 subgroup_announce_table (subgrp
, NULL
, 1);
587 subgroup_default_originate (struct update_subgroup
*subgrp
, int withdraw
)
591 struct aspath
*aspath
;
597 int ret
= RMAP_DENYMATCH
;
604 peer
= SUBGRP_PEER (subgrp
);
605 afi
= SUBGRP_AFI (subgrp
);
606 safi
= SUBGRP_SAFI (subgrp
);
608 if (!(afi
== AFI_IP
|| afi
== AFI_IP6
))
612 from
= bgp
->peer_self
;
614 bgp_attr_default_set (&attr
, BGP_ORIGIN_IGP
);
615 aspath
= attr
.aspath
;
616 attr
.local_pref
= bgp
->default_local_pref
;
617 memcpy (&attr
.nexthop
, &peer
->nexthop
.v4
, IPV4_MAX_BYTELEN
);
620 str2prefix ("0.0.0.0/0", &p
);
622 else if (afi
== AFI_IP6
)
624 struct attr_extra
*ae
= attr
.extra
;
626 str2prefix ("::/0", &p
);
628 /* IPv6 global nexthop must be included. */
629 memcpy (&ae
->mp_nexthop_global
, &peer
->nexthop
.v6_global
,
631 ae
->mp_nexthop_len
= BGP_ATTR_NHLEN_IPV6_GLOBAL
;
633 /* If the peer is on shared nextwork and we have link-local
635 if (peer
->shared_network
636 && !IN6_IS_ADDR_UNSPECIFIED (&peer
->nexthop
.v6_local
))
638 memcpy (&ae
->mp_nexthop_local
, &peer
->nexthop
.v6_local
,
640 ae
->mp_nexthop_len
= BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
;
643 #endif /* HAVE_IPV6 */
645 if (peer
->default_rmap
[afi
][safi
].name
)
647 SET_FLAG (bgp
->peer_self
->rmap_type
, PEER_RMAP_TYPE_DEFAULT
);
648 for (rn
= bgp_table_top (bgp
->rib
[afi
][safi
]); rn
;
649 rn
= bgp_route_next (rn
))
651 for (ri
= rn
->info
; ri
; ri
= ri
->next
)
653 struct attr dummy_attr
;
654 struct attr_extra dummy_extra
;
655 struct bgp_info info
;
657 /* Provide dummy so the route-map can't modify the attributes */
658 dummy_attr
.extra
= &dummy_extra
;
659 bgp_attr_dup (&dummy_attr
, ri
->attr
);
660 info
.peer
= ri
->peer
;
661 info
.attr
= &dummy_attr
;
664 route_map_apply (peer
->default_rmap
[afi
][safi
].map
, &rn
->p
,
667 /* The route map might have set attributes. If we don't flush them
668 * here, they will be leaked. */
669 bgp_attr_flush (&dummy_attr
);
670 if (ret
!= RMAP_DENYMATCH
)
673 if (ret
!= RMAP_DENYMATCH
)
676 bgp
->peer_self
->rmap_type
= 0;
678 if (ret
== RMAP_DENYMATCH
)
684 if (CHECK_FLAG (subgrp
->sflags
, SUBGRP_STATUS_DEFAULT_ORIGINATE
))
685 subgroup_default_withdraw_packet (subgrp
);
686 UNSET_FLAG (subgrp
->sflags
, SUBGRP_STATUS_DEFAULT_ORIGINATE
);
690 if (!CHECK_FLAG (subgrp
->sflags
, SUBGRP_STATUS_DEFAULT_ORIGINATE
))
692 SET_FLAG (subgrp
->sflags
, SUBGRP_STATUS_DEFAULT_ORIGINATE
);
693 subgroup_default_update_packet (subgrp
, &attr
, from
);
695 /* The 'neighbor x.x.x.x default-originate' default will act as an
696 * implicit withdraw for any previous UPDATEs sent for 0.0.0.0/0 so
697 * clear adj_out for the 0.0.0.0/0 prefix in the BGP table.
700 str2prefix ("0.0.0.0/0", &p
);
703 str2prefix ("::/0", &p
);
704 #endif /* HAVE_IPV6 */
706 rn
= bgp_afi_node_get (bgp
->rib
[afi
][safi
], afi
, safi
, &p
, NULL
);
707 bgp_adj_out_unset_subgroup (rn
, subgrp
, 0);
711 bgp_attr_extra_free (&attr
);
712 aspath_unintern (&aspath
);
716 * Announce the BGP table to a subgroup.
718 * At startup, we try to optimize route announcement by coalescing the
719 * peer-up events. This is done only the first time - from then on,
720 * subgrp->v_coalesce will be set to zero and the normal logic
724 subgroup_announce_all (struct update_subgroup
*subgrp
)
730 * If coalesce timer value is not set, announce routes immediately.
732 if (!subgrp
->v_coalesce
)
734 if (bgp_debug_update(NULL
, NULL
, subgrp
->update_group
, 0))
735 zlog_debug ("u%" PRIu64
":s%" PRIu64
" announcing all routes",
736 subgrp
->update_group
->id
, subgrp
->id
);
737 subgroup_announce_route (subgrp
);
742 * We should wait for the coalesce timer. Arm the timer if not done.
744 if (!subgrp
->t_coalesce
)
746 THREAD_TIMER_MSEC_ON (master
, subgrp
->t_coalesce
, subgroup_coalesce_timer
,
747 subgrp
, subgrp
->v_coalesce
);
752 * Go through all update subgroups and set up the adv queue for the
756 group_announce_route (struct bgp
*bgp
, afi_t afi
, safi_t safi
,
757 struct bgp_node
*rn
, struct bgp_info
*ri
)
759 struct updwalk_context ctx
;
762 update_group_af_walk (bgp
, afi
, safi
, group_announce_route_walkcb
, &ctx
);
766 update_group_show_adj_queue (struct bgp
*bgp
, afi_t afi
, safi_t safi
,
767 struct vty
*vty
, u_int64_t id
)
769 updgrp_show_adj (bgp
, afi
, safi
, vty
, id
, UPDWALK_FLAGS_ADVQUEUE
);
773 update_group_show_advertised (struct bgp
*bgp
, afi_t afi
, safi_t safi
,
774 struct vty
*vty
, u_int64_t id
)
776 updgrp_show_adj (bgp
, afi
, safi
, vty
, id
, UPDWALK_FLAGS_ADVERTISED
);
780 update_group_announce (struct bgp
*bgp
)
782 update_group_walk (bgp
, update_group_announce_walkcb
, NULL
);
786 update_group_announce_rrclients (struct bgp
*bgp
)
788 update_group_walk (bgp
, update_group_announce_rrc_walkcb
, NULL
);