3 * Copyright (C) 2010 Google Inc.
5 * This file is part of Quagga
7 * Quagga is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
12 * Quagga is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; see the file COPYING; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "sockunion.h"
32 #include "bgpd/bgpd.h"
33 #include "bgpd/bgp_table.h"
34 #include "bgpd/bgp_route.h"
35 #include "bgpd/bgp_attr.h"
36 #include "bgpd/bgp_debug.h"
37 #include "bgpd/bgp_aspath.h"
38 #include "bgpd/bgp_community.h"
39 #include "bgpd/bgp_ecommunity.h"
40 #include "bgpd/bgp_lcommunity.h"
41 #include "bgpd/bgp_mpath.h"
44 * bgp_maximum_paths_set
46 * Record maximum-paths configuration for BGP instance
48 int bgp_maximum_paths_set(struct bgp
*bgp
, afi_t afi
, safi_t safi
, int peertype
,
49 uint16_t maxpaths
, uint16_t options
)
51 if (!bgp
|| (afi
>= AFI_MAX
) || (safi
>= SAFI_MAX
))
56 bgp
->maxpaths
[afi
][safi
].maxpaths_ibgp
= maxpaths
;
57 bgp
->maxpaths
[afi
][safi
].ibgp_flags
|= options
;
60 bgp
->maxpaths
[afi
][safi
].maxpaths_ebgp
= maxpaths
;
70 * bgp_maximum_paths_unset
72 * Remove maximum-paths configuration from BGP instance
74 int bgp_maximum_paths_unset(struct bgp
*bgp
, afi_t afi
, safi_t safi
,
77 if (!bgp
|| (afi
>= AFI_MAX
) || (safi
>= SAFI_MAX
))
82 bgp
->maxpaths
[afi
][safi
].maxpaths_ibgp
= multipath_num
;
83 bgp
->maxpaths
[afi
][safi
].ibgp_flags
= 0;
86 bgp
->maxpaths
[afi
][safi
].maxpaths_ebgp
= multipath_num
;
98 * Return true if ifindex for ifp1 and ifp2 are the same, else return false.
100 static int bgp_interface_same(struct interface
*ifp1
, struct interface
*ifp2
)
111 return (ifp1
->ifindex
== ifp2
->ifindex
);
116 * bgp_path_info_nexthop_cmp
118 * Compare the nexthops of two paths. Return value is less than, equal to,
119 * or greater than zero if bpi1 is respectively less than, equal to,
120 * or greater than bpi2.
122 int bgp_path_info_nexthop_cmp(struct bgp_path_info
*bpi1
,
123 struct bgp_path_info
*bpi2
)
126 struct in6_addr addr1
, addr2
;
128 compare
= IPV4_ADDR_CMP(&bpi1
->attr
->nexthop
, &bpi2
->attr
->nexthop
);
130 if (bpi1
->attr
->mp_nexthop_len
== bpi2
->attr
->mp_nexthop_len
) {
131 switch (bpi1
->attr
->mp_nexthop_len
) {
132 case BGP_ATTR_NHLEN_IPV4
:
133 case BGP_ATTR_NHLEN_VPNV4
:
134 compare
= IPV4_ADDR_CMP(
135 &bpi1
->attr
->mp_nexthop_global_in
,
136 &bpi2
->attr
->mp_nexthop_global_in
);
138 case BGP_ATTR_NHLEN_IPV6_GLOBAL
:
139 case BGP_ATTR_NHLEN_VPNV6_GLOBAL
:
140 compare
= IPV6_ADDR_CMP(
141 &bpi1
->attr
->mp_nexthop_global
,
142 &bpi2
->attr
->mp_nexthop_global
);
144 case BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
:
145 addr1
= (bpi1
->attr
->mp_nexthop_prefer_global
)
146 ? bpi1
->attr
->mp_nexthop_global
147 : bpi1
->attr
->mp_nexthop_local
;
148 addr2
= (bpi2
->attr
->mp_nexthop_prefer_global
)
149 ? bpi2
->attr
->mp_nexthop_global
150 : bpi2
->attr
->mp_nexthop_local
;
152 if (!bpi1
->attr
->mp_nexthop_prefer_global
153 && !bpi2
->attr
->mp_nexthop_prefer_global
)
154 compare
= !bgp_interface_same(
159 compare
= IPV6_ADDR_CMP(&addr1
, &addr2
);
164 /* This can happen if one IPv6 peer sends you global and
166 * nexthops but another IPv6 peer only sends you global
168 else if (bpi1
->attr
->mp_nexthop_len
169 == BGP_ATTR_NHLEN_IPV6_GLOBAL
170 || bpi1
->attr
->mp_nexthop_len
171 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
) {
172 compare
= IPV6_ADDR_CMP(&bpi1
->attr
->mp_nexthop_global
,
173 &bpi2
->attr
->mp_nexthop_global
);
175 if (bpi1
->attr
->mp_nexthop_len
176 < bpi2
->attr
->mp_nexthop_len
)
188 * bgp_path_info_mpath_cmp
190 * This function determines our multipath list ordering. By ordering
191 * the list we can deterministically select which paths are included
192 * in the multipath set. The ordering also helps in detecting changes
193 * in the multipath selection so we can detect whether to send an
196 * The order of paths is determined first by received nexthop, and then
197 * by peer address if the nexthops are the same.
199 static int bgp_path_info_mpath_cmp(void *val1
, void *val2
)
201 struct bgp_path_info
*bpi1
, *bpi2
;
207 compare
= bgp_path_info_nexthop_cmp(bpi1
, bpi2
);
210 if (!bpi1
->peer
->su_remote
&& !bpi2
->peer
->su_remote
)
212 else if (!bpi1
->peer
->su_remote
)
214 else if (!bpi2
->peer
->su_remote
)
217 compare
= sockunion_cmp(bpi1
->peer
->su_remote
,
218 bpi2
->peer
->su_remote
);
227 * Initialize the mp_list, which holds the list of multipaths
228 * selected by bgp_best_selection
230 void bgp_mp_list_init(struct list
*mp_list
)
233 memset(mp_list
, 0, sizeof(struct list
));
234 mp_list
->cmp
= bgp_path_info_mpath_cmp
;
240 * Clears all entries out of the mp_list
242 void bgp_mp_list_clear(struct list
*mp_list
)
245 list_delete_all_node(mp_list
);
251 * Adds a multipath entry to the mp_list
253 void bgp_mp_list_add(struct list
*mp_list
, struct bgp_path_info
*mpinfo
)
255 assert(mp_list
&& mpinfo
);
256 listnode_add_sort(mp_list
, mpinfo
);
260 * bgp_path_info_mpath_new
262 * Allocate and zero memory for a new bgp_path_info_mpath element
264 static struct bgp_path_info_mpath
*bgp_path_info_mpath_new(void)
266 struct bgp_path_info_mpath
*new_mpath
;
267 new_mpath
= XCALLOC(MTYPE_BGP_MPATH_INFO
,
268 sizeof(struct bgp_path_info_mpath
));
273 * bgp_path_info_mpath_free
275 * Release resources for a bgp_path_info_mpath element and zero out pointer
277 void bgp_path_info_mpath_free(struct bgp_path_info_mpath
**mpath
)
279 if (mpath
&& *mpath
) {
280 if ((*mpath
)->mp_attr
)
281 bgp_attr_unintern(&(*mpath
)->mp_attr
);
282 XFREE(MTYPE_BGP_MPATH_INFO
, *mpath
);
288 * bgp_path_info_mpath_get
290 * Fetch the mpath element for the given bgp_path_info. Used for
291 * doing lazy allocation.
293 static struct bgp_path_info_mpath
*
294 bgp_path_info_mpath_get(struct bgp_path_info
*path
)
296 struct bgp_path_info_mpath
*mpath
;
302 mpath
= bgp_path_info_mpath_new();
306 mpath
->mp_info
= path
;
312 * bgp_path_info_mpath_enqueue
314 * Enqueue a path onto the multipath list given the previous multipath
317 static void bgp_path_info_mpath_enqueue(struct bgp_path_info
*prev_info
,
318 struct bgp_path_info
*path
)
320 struct bgp_path_info_mpath
*prev
, *mpath
;
322 prev
= bgp_path_info_mpath_get(prev_info
);
323 mpath
= bgp_path_info_mpath_get(path
);
327 mpath
->mp_next
= prev
->mp_next
;
328 mpath
->mp_prev
= prev
;
330 prev
->mp_next
->mp_prev
= mpath
;
331 prev
->mp_next
= mpath
;
333 SET_FLAG(path
->flags
, BGP_PATH_MULTIPATH
);
337 * bgp_path_info_mpath_dequeue
339 * Remove a path from the multipath list
341 void bgp_path_info_mpath_dequeue(struct bgp_path_info
*path
)
343 struct bgp_path_info_mpath
*mpath
= path
->mpath
;
347 mpath
->mp_prev
->mp_next
= mpath
->mp_next
;
349 mpath
->mp_next
->mp_prev
= mpath
->mp_prev
;
350 mpath
->mp_next
= mpath
->mp_prev
= NULL
;
351 UNSET_FLAG(path
->flags
, BGP_PATH_MULTIPATH
);
355 * bgp_path_info_mpath_next
357 * Given a bgp_path_info, return the next multipath entry
359 struct bgp_path_info
*bgp_path_info_mpath_next(struct bgp_path_info
*path
)
361 if (!path
->mpath
|| !path
->mpath
->mp_next
)
363 return path
->mpath
->mp_next
->mp_info
;
367 * bgp_path_info_mpath_first
369 * Given bestpath bgp_path_info, return the first multipath entry.
371 struct bgp_path_info
*bgp_path_info_mpath_first(struct bgp_path_info
*path
)
373 return bgp_path_info_mpath_next(path
);
377 * bgp_path_info_mpath_count
379 * Given the bestpath bgp_path_info, return the number of multipath entries
381 uint32_t bgp_path_info_mpath_count(struct bgp_path_info
*path
)
385 return path
->mpath
->mp_count
;
389 * bgp_path_info_mpath_count_set
391 * Sets the count of multipaths into bestpath's mpath element
393 static void bgp_path_info_mpath_count_set(struct bgp_path_info
*path
,
396 struct bgp_path_info_mpath
*mpath
;
397 if (!count
&& !path
->mpath
)
399 mpath
= bgp_path_info_mpath_get(path
);
402 mpath
->mp_count
= count
;
406 * bgp_path_info_mpath_attr
408 * Given bestpath bgp_path_info, return aggregated attribute set used
409 * for advertising the multipath route
411 struct attr
*bgp_path_info_mpath_attr(struct bgp_path_info
*path
)
415 return path
->mpath
->mp_attr
;
419 * bgp_path_info_mpath_attr_set
421 * Sets the aggregated attribute into bestpath's mpath element
423 static void bgp_path_info_mpath_attr_set(struct bgp_path_info
*path
,
426 struct bgp_path_info_mpath
*mpath
;
427 if (!attr
&& !path
->mpath
)
429 mpath
= bgp_path_info_mpath_get(path
);
432 mpath
->mp_attr
= attr
;
436 * bgp_path_info_mpath_update
438 * Compare and sync up the multipath list with the mp_list generated by
441 void bgp_path_info_mpath_update(struct bgp_node
*rn
,
442 struct bgp_path_info
*new_best
,
443 struct bgp_path_info
*old_best
,
444 struct list
*mp_list
,
445 struct bgp_maxpaths_cfg
*mpath_cfg
)
447 uint16_t maxpaths
, mpath_count
, old_mpath_count
;
448 struct listnode
*mp_node
, *mp_next_node
;
449 struct bgp_path_info
*cur_mpath
, *new_mpath
, *next_mpath
, *prev_mpath
;
450 int mpath_changed
, debug
;
451 char pfx_buf
[PREFIX2STR_BUFFER
], nh_buf
[2][INET6_ADDRSTRLEN
];
452 char path_buf
[PATH_ADDPATH_STR_BUFFER
];
455 maxpaths
= multipath_num
;
459 prev_mpath
= new_best
;
460 mp_node
= listhead(mp_list
);
461 debug
= bgp_debug_bestpath(&rn
->p
);
464 prefix2str(&rn
->p
, pfx_buf
, sizeof(pfx_buf
));
468 if (new_best
!= old_best
)
469 bgp_path_info_mpath_dequeue(new_best
);
470 maxpaths
= (new_best
->peer
->sort
== BGP_PEER_IBGP
)
471 ? mpath_cfg
->maxpaths_ibgp
472 : mpath_cfg
->maxpaths_ebgp
;
476 cur_mpath
= bgp_path_info_mpath_first(old_best
);
477 old_mpath_count
= bgp_path_info_mpath_count(old_best
);
478 bgp_path_info_mpath_count_set(old_best
, 0);
479 bgp_path_info_mpath_dequeue(old_best
);
484 "%s: starting mpath update, newbest %s num candidates %d old-mpath-count %d",
485 pfx_buf
, new_best
? new_best
->peer
->host
: "NONE",
486 mp_list
? listcount(mp_list
) : 0, old_mpath_count
);
489 * We perform an ordered walk through both lists in parallel.
490 * The reason for the ordered walk is that if there are paths
491 * that were previously multipaths and are still multipaths, the walk
492 * should encounter them in both lists at the same time. Otherwise
493 * there will be paths that are in one list or another, and we
494 * will deal with these separately.
496 * Note that new_best might be somewhere in the mp_list, so we need
499 while (mp_node
|| cur_mpath
) {
500 struct bgp_path_info
*tmp_info
;
503 * We can bail out of this loop if all existing paths on the
504 * multipath list have been visited (for cleanup purposes) and
505 * the maxpath requirement is fulfulled
507 if (!cur_mpath
&& (mpath_count
>= maxpaths
))
510 mp_next_node
= mp_node
? listnextnode(mp_node
) : NULL
;
512 cur_mpath
? bgp_path_info_mpath_next(cur_mpath
) : NULL
;
513 tmp_info
= mp_node
? listgetdata(mp_node
) : NULL
;
517 "%s: comparing candidate %s with existing mpath %s",
519 tmp_info
? tmp_info
->peer
->host
: "NONE",
520 cur_mpath
? cur_mpath
->peer
->host
: "NONE");
523 * If equal, the path was a multipath and is still a multipath.
524 * Insert onto new multipath list if maxpaths allows.
526 if (mp_node
&& (listgetdata(mp_node
) == cur_mpath
)) {
527 list_delete_node(mp_list
, mp_node
);
528 bgp_path_info_mpath_dequeue(cur_mpath
);
529 if ((mpath_count
< maxpaths
)
531 && bgp_path_info_nexthop_cmp(prev_mpath
,
533 bgp_path_info_mpath_enqueue(prev_mpath
,
535 prev_mpath
= cur_mpath
;
538 bgp_path_info_path_with_addpath_rx_str(
539 cur_mpath
, path_buf
);
541 "%s: %s is still multipath, cur count %d",
542 pfx_buf
, path_buf
, mpath_count
);
547 bgp_path_info_path_with_addpath_rx_str(
548 cur_mpath
, path_buf
);
550 "%s: remove mpath %s nexthop %s, cur count %d",
560 mp_node
= mp_next_node
;
561 cur_mpath
= next_mpath
;
567 || (bgp_path_info_mpath_cmp(cur_mpath
,
568 listgetdata(mp_node
))
571 * If here, we have an old multipath and either the
573 * is finished or the next mp_node points to a later
574 * multipath, so we need to purge this path from the
577 bgp_path_info_mpath_dequeue(cur_mpath
);
580 bgp_path_info_path_with_addpath_rx_str(
581 cur_mpath
, path_buf
);
583 "%s: remove mpath %s nexthop %s, cur count %d",
586 &cur_mpath
->attr
->nexthop
,
587 nh_buf
[0], sizeof(nh_buf
[0])),
590 cur_mpath
= next_mpath
;
593 * If here, we have a path on the mp_list that was not
595 * a multipath (due to non-equivalance or maxpaths
597 * or the matching multipath is sorted later in the
599 * list. Before we enqueue the path on the new multipath
601 * make sure its not on the old_best multipath list or
604 * - If next_mpath points to this new path, update
606 * point to the multipath after this one
607 * - Dequeue the path from the multipath list just to
610 new_mpath
= listgetdata(mp_node
);
611 list_delete_node(mp_list
, mp_node
);
614 if ((mpath_count
< maxpaths
) && (new_mpath
!= new_best
)
615 && bgp_path_info_nexthop_cmp(prev_mpath
,
617 if (new_mpath
== next_mpath
)
618 bgp_path_info_mpath_next(new_mpath
);
619 bgp_path_info_mpath_dequeue(new_mpath
);
621 bgp_path_info_mpath_enqueue(prev_mpath
,
623 prev_mpath
= new_mpath
;
627 bgp_path_info_path_with_addpath_rx_str(
628 new_mpath
, path_buf
);
630 "%s: add mpath %s nexthop %s, cur count %d",
640 mp_node
= mp_next_node
;
647 "%s: New mpath count (incl newbest) %d mpath-change %s",
648 pfx_buf
, mpath_count
,
649 mpath_changed
? "YES" : "NO");
651 bgp_path_info_mpath_count_set(new_best
, mpath_count
- 1);
653 || (bgp_path_info_mpath_count(new_best
) != old_mpath_count
))
654 SET_FLAG(new_best
->flags
, BGP_PATH_MULTIPATH_CHG
);
659 * bgp_mp_dmed_deselect
661 * Clean up multipath information for BGP_PATH_DMED_SELECTED path that
662 * is not selected as best path
664 void bgp_mp_dmed_deselect(struct bgp_path_info
*dmed_best
)
666 struct bgp_path_info
*mpinfo
, *mpnext
;
671 for (mpinfo
= bgp_path_info_mpath_first(dmed_best
); mpinfo
;
673 mpnext
= bgp_path_info_mpath_next(mpinfo
);
674 bgp_path_info_mpath_dequeue(mpinfo
);
677 bgp_path_info_mpath_count_set(dmed_best
, 0);
678 UNSET_FLAG(dmed_best
->flags
, BGP_PATH_MULTIPATH_CHG
);
679 assert(bgp_path_info_mpath_first(dmed_best
) == NULL
);
683 * bgp_path_info_mpath_aggregate_update
685 * Set the multipath aggregate attribute. We need to see if the
686 * aggregate has changed and then set the ATTR_CHANGED flag on the
687 * bestpath info so that a peer update will be generated. The
688 * change is detected by generating the current attribute,
689 * interning it, and then comparing the interned pointer with the
690 * current value. We can skip this generate/compare step if there
691 * is no change in multipath selection and no attribute change in
694 void bgp_path_info_mpath_aggregate_update(struct bgp_path_info
*new_best
,
695 struct bgp_path_info
*old_best
)
697 struct bgp_path_info
*mpinfo
;
698 struct aspath
*aspath
;
699 struct aspath
*asmerge
;
700 struct attr
*new_attr
, *old_attr
;
702 struct community
*community
, *commerge
;
703 struct ecommunity
*ecomm
, *ecommerge
;
704 struct lcommunity
*lcomm
, *lcommerge
;
705 struct attr attr
= {0};
707 if (old_best
&& (old_best
!= new_best
)
708 && (old_attr
= bgp_path_info_mpath_attr(old_best
))) {
709 bgp_attr_unintern(&old_attr
);
710 bgp_path_info_mpath_attr_set(old_best
, NULL
);
716 if (!bgp_path_info_mpath_count(new_best
)) {
717 if ((new_attr
= bgp_path_info_mpath_attr(new_best
))) {
718 bgp_attr_unintern(&new_attr
);
719 bgp_path_info_mpath_attr_set(new_best
, NULL
);
720 SET_FLAG(new_best
->flags
, BGP_PATH_ATTR_CHANGED
);
725 bgp_attr_dup(&attr
, new_best
->attr
);
727 if (new_best
->peer
&& bgp_flag_check(new_best
->peer
->bgp
,
728 BGP_FLAG_MULTIPATH_RELAX_AS_SET
)) {
730 /* aggregate attribute from multipath constituents */
731 aspath
= aspath_dup(attr
.aspath
);
732 origin
= attr
.origin
;
734 attr
.community
? community_dup(attr
.community
) : NULL
;
735 ecomm
= (attr
.ecommunity
) ? ecommunity_dup(attr
.ecommunity
)
737 lcomm
= (attr
.lcommunity
) ? lcommunity_dup(attr
.lcommunity
)
740 for (mpinfo
= bgp_path_info_mpath_first(new_best
); mpinfo
;
741 mpinfo
= bgp_path_info_mpath_next(mpinfo
)) {
743 aspath_aggregate(aspath
, mpinfo
->attr
->aspath
);
747 if (origin
< mpinfo
->attr
->origin
)
748 origin
= mpinfo
->attr
->origin
;
750 if (mpinfo
->attr
->community
) {
752 commerge
= community_merge(
754 mpinfo
->attr
->community
);
756 community_uniq_sort(commerge
);
757 community_free(&commerge
);
759 community
= community_dup(
760 mpinfo
->attr
->community
);
763 if (mpinfo
->attr
->ecommunity
) {
765 ecommerge
= ecommunity_merge(
767 mpinfo
->attr
->ecommunity
);
768 ecomm
= ecommunity_uniq_sort(ecommerge
);
769 ecommunity_free(&ecommerge
);
771 ecomm
= ecommunity_dup(
772 mpinfo
->attr
->ecommunity
);
774 if (mpinfo
->attr
->lcommunity
) {
776 lcommerge
= lcommunity_merge(
778 mpinfo
->attr
->lcommunity
);
779 lcomm
= lcommunity_uniq_sort(lcommerge
);
780 lcommunity_free(&lcommerge
);
782 lcomm
= lcommunity_dup(
783 mpinfo
->attr
->lcommunity
);
787 attr
.aspath
= aspath
;
788 attr
.origin
= origin
;
790 attr
.community
= community
;
791 attr
.flag
|= ATTR_FLAG_BIT(BGP_ATTR_COMMUNITIES
);
794 attr
.ecommunity
= ecomm
;
795 attr
.flag
|= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES
);
798 attr
.lcommunity
= lcomm
;
799 attr
.flag
|= ATTR_FLAG_BIT(BGP_ATTR_LARGE_COMMUNITIES
);
802 /* Zap multipath attr nexthop so we set nexthop to self */
803 attr
.nexthop
.s_addr
= 0;
804 memset(&attr
.mp_nexthop_global
, 0, sizeof(struct in6_addr
));
806 /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */
809 new_attr
= bgp_attr_intern(&attr
);
811 if (new_attr
!= bgp_path_info_mpath_attr(new_best
)) {
812 if ((old_attr
= bgp_path_info_mpath_attr(new_best
)))
813 bgp_attr_unintern(&old_attr
);
814 bgp_path_info_mpath_attr_set(new_best
, new_attr
);
815 SET_FLAG(new_best
->flags
, BGP_PATH_ATTR_CHANGED
);
817 bgp_attr_unintern(&new_attr
);