3 * Copyright (C) 2010 Google Inc.
5 * This file is part of Quagga
7 * Quagga is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
12 * Quagga is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; see the file COPYING; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "sockunion.h"
32 #include "bgpd/bgpd.h"
33 #include "bgpd/bgp_table.h"
34 #include "bgpd/bgp_route.h"
35 #include "bgpd/bgp_attr.h"
36 #include "bgpd/bgp_debug.h"
37 #include "bgpd/bgp_aspath.h"
38 #include "bgpd/bgp_community.h"
39 #include "bgpd/bgp_ecommunity.h"
40 #include "bgpd/bgp_lcommunity.h"
41 #include "bgpd/bgp_mpath.h"
44 * bgp_maximum_paths_set
46 * Record maximum-paths configuration for BGP instance
49 bgp_maximum_paths_set (struct bgp
*bgp
, afi_t afi
, safi_t safi
,
50 int peertype
, u_int16_t maxpaths
, u_int16_t options
)
52 if (!bgp
|| (afi
>= AFI_MAX
) || (safi
>= SAFI_MAX
))
58 bgp
->maxpaths
[afi
][safi
].maxpaths_ibgp
= maxpaths
;
59 bgp
->maxpaths
[afi
][safi
].ibgp_flags
|= options
;
62 bgp
->maxpaths
[afi
][safi
].maxpaths_ebgp
= maxpaths
;
72 * bgp_maximum_paths_unset
74 * Remove maximum-paths configuration from BGP instance
77 bgp_maximum_paths_unset (struct bgp
*bgp
, afi_t afi
, safi_t safi
,
80 if (!bgp
|| (afi
>= AFI_MAX
) || (safi
>= SAFI_MAX
))
86 bgp
->maxpaths
[afi
][safi
].maxpaths_ibgp
= multipath_num
;
87 bgp
->maxpaths
[afi
][safi
].ibgp_flags
= 0;
90 bgp
->maxpaths
[afi
][safi
].maxpaths_ebgp
= multipath_num
;
100 * bgp_info_nexthop_cmp
102 * Compare the nexthops of two paths. Return value is less than, equal to,
103 * or greater than zero if bi1 is respectively less than, equal to,
104 * or greater than bi2.
107 bgp_info_nexthop_cmp (struct bgp_info
*bi1
, struct bgp_info
*bi2
)
109 struct attr_extra
*ae1
, *ae2
;
112 ae1
= bi1
->attr
->extra
;
113 ae2
= bi2
->attr
->extra
;
115 compare
= IPV4_ADDR_CMP (&bi1
->attr
->nexthop
, &bi2
->attr
->nexthop
);
116 if (!compare
&& ae1
&& ae2
)
118 if (ae1
->mp_nexthop_len
== ae2
->mp_nexthop_len
)
120 switch (ae1
->mp_nexthop_len
)
122 case BGP_ATTR_NHLEN_IPV4
:
123 case BGP_ATTR_NHLEN_VPNV4
:
124 compare
= IPV4_ADDR_CMP (&ae1
->mp_nexthop_global_in
,
125 &ae2
->mp_nexthop_global_in
);
127 case BGP_ATTR_NHLEN_IPV6_GLOBAL
:
128 case BGP_ATTR_NHLEN_VPNV6_GLOBAL
:
129 compare
= IPV6_ADDR_CMP (&ae1
->mp_nexthop_global
,
130 &ae2
->mp_nexthop_global
);
132 case BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
:
133 compare
= IPV6_ADDR_CMP (&ae1
->mp_nexthop_global
,
134 &ae2
->mp_nexthop_global
);
136 compare
= IPV6_ADDR_CMP (&ae1
->mp_nexthop_local
,
137 &ae2
->mp_nexthop_local
);
142 /* This can happen if one IPv6 peer sends you global and link-local
143 * nexthops but another IPv6 peer only sends you global
145 else if (ae1
->mp_nexthop_len
== BGP_ATTR_NHLEN_IPV6_GLOBAL
||
146 ae1
->mp_nexthop_len
== BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
)
148 compare
= IPV6_ADDR_CMP (&ae1
->mp_nexthop_global
,
149 &ae2
->mp_nexthop_global
);
152 if (ae1
->mp_nexthop_len
< ae2
->mp_nexthop_len
)
166 * This function determines our multipath list ordering. By ordering
167 * the list we can deterministically select which paths are included
168 * in the multipath set. The ordering also helps in detecting changes
169 * in the multipath selection so we can detect whether to send an
172 * The order of paths is determined first by received nexthop, and then
173 * by peer address if the nexthops are the same.
176 bgp_info_mpath_cmp (void *val1
, void *val2
)
178 struct bgp_info
*bi1
, *bi2
;
184 compare
= bgp_info_nexthop_cmp (bi1
, bi2
);
188 if (!bi1
->peer
->su_remote
&& !bi2
->peer
->su_remote
)
190 else if (!bi1
->peer
->su_remote
)
192 else if (!bi2
->peer
->su_remote
)
195 compare
= sockunion_cmp (bi1
->peer
->su_remote
, bi2
->peer
->su_remote
);
204 * Initialize the mp_list, which holds the list of multipaths
205 * selected by bgp_best_selection
208 bgp_mp_list_init (struct list
*mp_list
)
211 memset (mp_list
, 0, sizeof (struct list
));
212 mp_list
->cmp
= bgp_info_mpath_cmp
;
218 * Clears all entries out of the mp_list
221 bgp_mp_list_clear (struct list
*mp_list
)
224 list_delete_all_node (mp_list
);
230 * Adds a multipath entry to the mp_list
233 bgp_mp_list_add (struct list
*mp_list
, struct bgp_info
*mpinfo
)
235 assert (mp_list
&& mpinfo
);
236 listnode_add_sort (mp_list
, mpinfo
);
242 * Allocate and zero memory for a new bgp_info_mpath element
244 static struct bgp_info_mpath
*
245 bgp_info_mpath_new (void)
247 struct bgp_info_mpath
*new_mpath
;
248 new_mpath
= XCALLOC (MTYPE_BGP_MPATH_INFO
, sizeof (struct bgp_info_mpath
));
253 * bgp_info_mpath_free
255 * Release resources for a bgp_info_mpath element and zero out pointer
258 bgp_info_mpath_free (struct bgp_info_mpath
**mpath
)
262 if ((*mpath
)->mp_attr
)
263 bgp_attr_unintern (&(*mpath
)->mp_attr
);
264 XFREE (MTYPE_BGP_MPATH_INFO
, *mpath
);
272 * Fetch the mpath element for the given bgp_info. Used for
273 * doing lazy allocation.
275 static struct bgp_info_mpath
*
276 bgp_info_mpath_get (struct bgp_info
*binfo
)
278 struct bgp_info_mpath
*mpath
;
281 mpath
= bgp_info_mpath_new();
284 binfo
->mpath
= mpath
;
285 mpath
->mp_info
= binfo
;
291 * bgp_info_mpath_enqueue
293 * Enqueue a path onto the multipath list given the previous multipath
297 bgp_info_mpath_enqueue (struct bgp_info
*prev_info
, struct bgp_info
*binfo
)
299 struct bgp_info_mpath
*prev
, *mpath
;
301 prev
= bgp_info_mpath_get (prev_info
);
302 mpath
= bgp_info_mpath_get (binfo
);
306 mpath
->mp_next
= prev
->mp_next
;
307 mpath
->mp_prev
= prev
;
309 prev
->mp_next
->mp_prev
= mpath
;
310 prev
->mp_next
= mpath
;
312 SET_FLAG (binfo
->flags
, BGP_INFO_MULTIPATH
);
316 * bgp_info_mpath_dequeue
318 * Remove a path from the multipath list
321 bgp_info_mpath_dequeue (struct bgp_info
*binfo
)
323 struct bgp_info_mpath
*mpath
= binfo
->mpath
;
327 mpath
->mp_prev
->mp_next
= mpath
->mp_next
;
329 mpath
->mp_next
->mp_prev
= mpath
->mp_prev
;
330 mpath
->mp_next
= mpath
->mp_prev
= NULL
;
331 UNSET_FLAG (binfo
->flags
, BGP_INFO_MULTIPATH
);
335 * bgp_info_mpath_next
337 * Given a bgp_info, return the next multipath entry
340 bgp_info_mpath_next (struct bgp_info
*binfo
)
342 if (!binfo
->mpath
|| !binfo
->mpath
->mp_next
)
344 return binfo
->mpath
->mp_next
->mp_info
;
348 * bgp_info_mpath_first
350 * Given bestpath bgp_info, return the first multipath entry.
353 bgp_info_mpath_first (struct bgp_info
*binfo
)
355 return bgp_info_mpath_next (binfo
);
359 * bgp_info_mpath_count
361 * Given the bestpath bgp_info, return the number of multipath entries
364 bgp_info_mpath_count (struct bgp_info
*binfo
)
368 return binfo
->mpath
->mp_count
;
372 * bgp_info_mpath_count_set
374 * Sets the count of multipaths into bestpath's mpath element
377 bgp_info_mpath_count_set (struct bgp_info
*binfo
, u_int32_t count
)
379 struct bgp_info_mpath
*mpath
;
380 if (!count
&& !binfo
->mpath
)
382 mpath
= bgp_info_mpath_get (binfo
);
385 mpath
->mp_count
= count
;
389 * bgp_info_mpath_attr
391 * Given bestpath bgp_info, return aggregated attribute set used
392 * for advertising the multipath route
395 bgp_info_mpath_attr (struct bgp_info
*binfo
)
399 return binfo
->mpath
->mp_attr
;
403 * bgp_info_mpath_attr_set
405 * Sets the aggregated attribute into bestpath's mpath element
408 bgp_info_mpath_attr_set (struct bgp_info
*binfo
, struct attr
*attr
)
410 struct bgp_info_mpath
*mpath
;
411 if (!attr
&& !binfo
->mpath
)
413 mpath
= bgp_info_mpath_get (binfo
);
416 mpath
->mp_attr
= attr
;
420 * bgp_info_mpath_update
422 * Compare and sync up the multipath list with the mp_list generated by
426 bgp_info_mpath_update (struct bgp_node
*rn
, struct bgp_info
*new_best
,
427 struct bgp_info
*old_best
, struct list
*mp_list
,
428 struct bgp_maxpaths_cfg
*mpath_cfg
)
430 u_int16_t maxpaths
, mpath_count
, old_mpath_count
;
431 struct listnode
*mp_node
, *mp_next_node
;
432 struct bgp_info
*cur_mpath
, *new_mpath
, *next_mpath
, *prev_mpath
;
433 int mpath_changed
, debug
;
434 char pfx_buf
[PREFIX2STR_BUFFER
], nh_buf
[2][INET6_ADDRSTRLEN
];
435 char path_buf
[PATH_ADDPATH_STR_BUFFER
];
438 maxpaths
= multipath_num
;
442 prev_mpath
= new_best
;
443 mp_node
= listhead (mp_list
);
444 debug
= bgp_debug_bestpath(&rn
->p
);
447 prefix2str (&rn
->p
, pfx_buf
, sizeof (pfx_buf
));
452 if (new_best
!= old_best
)
453 bgp_info_mpath_dequeue (new_best
);
454 maxpaths
= (new_best
->peer
->sort
== BGP_PEER_IBGP
) ?
455 mpath_cfg
->maxpaths_ibgp
: mpath_cfg
->maxpaths_ebgp
;
460 cur_mpath
= bgp_info_mpath_first (old_best
);
461 old_mpath_count
= bgp_info_mpath_count (old_best
);
462 bgp_info_mpath_count_set (old_best
, 0);
463 bgp_info_mpath_dequeue (old_best
);
467 zlog_debug("%s: starting mpath update, newbest %s num candidates %d old-mpath-count %d",
468 pfx_buf
, new_best
? new_best
->peer
->host
: "NONE",
469 listcount (mp_list
), old_mpath_count
);
472 * We perform an ordered walk through both lists in parallel.
473 * The reason for the ordered walk is that if there are paths
474 * that were previously multipaths and are still multipaths, the walk
475 * should encounter them in both lists at the same time. Otherwise
476 * there will be paths that are in one list or another, and we
477 * will deal with these separately.
479 * Note that new_best might be somewhere in the mp_list, so we need
482 while (mp_node
|| cur_mpath
)
484 struct bgp_info
*tmp_info
;
487 * We can bail out of this loop if all existing paths on the
488 * multipath list have been visited (for cleanup purposes) and
489 * the maxpath requirement is fulfulled
491 if (!cur_mpath
&& (mpath_count
>= maxpaths
))
494 mp_next_node
= mp_node
? listnextnode (mp_node
) : NULL
;
495 next_mpath
= cur_mpath
? bgp_info_mpath_next (cur_mpath
) : NULL
;
496 tmp_info
= mp_node
? listgetdata (mp_node
) : NULL
;
499 zlog_debug("%s: comparing candidate %s with existing mpath %s",
500 pfx_buf
, tmp_info
? tmp_info
->peer
->host
: "NONE",
501 cur_mpath
? cur_mpath
->peer
->host
: "NONE");
504 * If equal, the path was a multipath and is still a multipath.
505 * Insert onto new multipath list if maxpaths allows.
507 if (mp_node
&& (listgetdata (mp_node
) == cur_mpath
))
509 list_delete_node (mp_list
, mp_node
);
510 bgp_info_mpath_dequeue (cur_mpath
);
511 if ((mpath_count
< maxpaths
) &&
512 bgp_info_nexthop_cmp (prev_mpath
, cur_mpath
))
514 bgp_info_mpath_enqueue (prev_mpath
, cur_mpath
);
515 prev_mpath
= cur_mpath
;
519 bgp_info_path_with_addpath_rx_str(cur_mpath
, path_buf
);
520 zlog_debug("%s: %s is still multipath, cur count %d",
521 pfx_buf
, path_buf
, mpath_count
);
529 bgp_info_path_with_addpath_rx_str(cur_mpath
, path_buf
);
530 zlog_debug ("%s: remove mpath %s nexthop %s, cur count %d",
532 inet_ntop (AF_INET
, &cur_mpath
->attr
->nexthop
,
533 nh_buf
[0], sizeof (nh_buf
[0])),
537 mp_node
= mp_next_node
;
538 cur_mpath
= next_mpath
;
542 if (cur_mpath
&& (!mp_node
||
543 (bgp_info_mpath_cmp (cur_mpath
,
544 listgetdata (mp_node
)) < 0)))
547 * If here, we have an old multipath and either the mp_list
548 * is finished or the next mp_node points to a later
549 * multipath, so we need to purge this path from the
552 bgp_info_mpath_dequeue (cur_mpath
);
556 bgp_info_path_with_addpath_rx_str(cur_mpath
, path_buf
);
557 zlog_debug ("%s: remove mpath %s nexthop %s, cur count %d",
559 inet_ntop (AF_INET
, &cur_mpath
->attr
->nexthop
,
560 nh_buf
[0], sizeof (nh_buf
[0])),
563 cur_mpath
= next_mpath
;
568 * If here, we have a path on the mp_list that was not previously
569 * a multipath (due to non-equivalance or maxpaths exceeded),
570 * or the matching multipath is sorted later in the multipath
571 * list. Before we enqueue the path on the new multipath list,
572 * make sure its not on the old_best multipath list or referenced
574 * - If next_mpath points to this new path, update next_mpath to
575 * point to the multipath after this one
576 * - Dequeue the path from the multipath list just to make sure
578 new_mpath
= listgetdata (mp_node
);
579 list_delete_node (mp_list
, mp_node
);
580 if ((mpath_count
< maxpaths
) && (new_mpath
!= new_best
) &&
581 bgp_info_nexthop_cmp (prev_mpath
, new_mpath
))
583 if (new_mpath
== next_mpath
)
584 bgp_info_mpath_next (new_mpath
);
585 bgp_info_mpath_dequeue (new_mpath
);
587 bgp_info_mpath_enqueue (prev_mpath
, new_mpath
);
588 prev_mpath
= new_mpath
;
593 bgp_info_path_with_addpath_rx_str(new_mpath
, path_buf
);
594 zlog_debug ("%s: add mpath %s nexthop %s, cur count %d",
596 inet_ntop (AF_INET
, &new_mpath
->attr
->nexthop
,
597 nh_buf
[0], sizeof (nh_buf
[0])),
601 mp_node
= mp_next_node
;
608 zlog_debug("%s: New mpath count (incl newbest) %d mpath-change %s",
609 pfx_buf
, mpath_count
, mpath_changed
? "YES" : "NO");
611 bgp_info_mpath_count_set (new_best
, mpath_count
-1);
612 if (mpath_changed
|| (bgp_info_mpath_count (new_best
) != old_mpath_count
))
613 SET_FLAG (new_best
->flags
, BGP_INFO_MULTIPATH_CHG
);
618 * bgp_mp_dmed_deselect
620 * Clean up multipath information for BGP_INFO_DMED_SELECTED path that
621 * is not selected as best path
624 bgp_mp_dmed_deselect (struct bgp_info
*dmed_best
)
626 struct bgp_info
*mpinfo
, *mpnext
;
631 for (mpinfo
= bgp_info_mpath_first (dmed_best
); mpinfo
; mpinfo
= mpnext
)
633 mpnext
= bgp_info_mpath_next (mpinfo
);
634 bgp_info_mpath_dequeue (mpinfo
);
637 bgp_info_mpath_count_set (dmed_best
, 0);
638 UNSET_FLAG (dmed_best
->flags
, BGP_INFO_MULTIPATH_CHG
);
639 assert (bgp_info_mpath_first (dmed_best
) == 0);
643 * bgp_info_mpath_aggregate_update
645 * Set the multipath aggregate attribute. We need to see if the
646 * aggregate has changed and then set the ATTR_CHANGED flag on the
647 * bestpath info so that a peer update will be generated. The
648 * change is detected by generating the current attribute,
649 * interning it, and then comparing the interned pointer with the
650 * current value. We can skip this generate/compare step if there
651 * is no change in multipath selection and no attribute change in
655 bgp_info_mpath_aggregate_update (struct bgp_info
*new_best
,
656 struct bgp_info
*old_best
)
658 struct bgp_info
*mpinfo
;
659 struct aspath
*aspath
;
660 struct aspath
*asmerge
;
661 struct attr
*new_attr
, *old_attr
;
663 struct community
*community
, *commerge
;
664 struct ecommunity
*ecomm
, *ecommerge
;
665 struct lcommunity
*lcomm
, *lcommerge
;
666 struct attr_extra
*ae
;
667 struct attr attr
= { 0 };
669 if (old_best
&& (old_best
!= new_best
) &&
670 (old_attr
= bgp_info_mpath_attr (old_best
)))
672 bgp_attr_unintern (&old_attr
);
673 bgp_info_mpath_attr_set (old_best
, NULL
);
679 if (!bgp_info_mpath_count (new_best
))
681 if ((new_attr
= bgp_info_mpath_attr (new_best
)))
683 bgp_attr_unintern (&new_attr
);
684 bgp_info_mpath_attr_set (new_best
, NULL
);
685 SET_FLAG (new_best
->flags
, BGP_INFO_ATTR_CHANGED
);
690 bgp_attr_dup (&attr
, new_best
->attr
);
692 if (new_best
->peer
&&
693 bgp_flag_check (new_best
->peer
->bgp
, BGP_FLAG_MULTIPATH_RELAX_AS_SET
))
696 /* aggregate attribute from multipath constituents */
697 aspath
= aspath_dup (attr
.aspath
);
698 origin
= attr
.origin
;
699 community
= attr
.community
? community_dup (attr
.community
) : NULL
;
701 ecomm
= (ae
&& ae
->ecommunity
) ? ecommunity_dup (ae
->ecommunity
) : NULL
;
702 lcomm
= (ae
&& ae
->lcommunity
) ? lcommunity_dup (ae
->lcommunity
) : NULL
;
704 for (mpinfo
= bgp_info_mpath_first (new_best
); mpinfo
;
705 mpinfo
= bgp_info_mpath_next (mpinfo
))
707 asmerge
= aspath_aggregate (aspath
, mpinfo
->attr
->aspath
);
708 aspath_free (aspath
);
711 if (origin
< mpinfo
->attr
->origin
)
712 origin
= mpinfo
->attr
->origin
;
714 if (mpinfo
->attr
->community
)
718 commerge
= community_merge (community
, mpinfo
->attr
->community
);
719 community
= community_uniq_sort (commerge
);
720 community_free (commerge
);
723 community
= community_dup (mpinfo
->attr
->community
);
726 ae
= mpinfo
->attr
->extra
;
727 if (ae
&& ae
->ecommunity
)
731 ecommerge
= ecommunity_merge (ecomm
, ae
->ecommunity
);
732 ecomm
= ecommunity_uniq_sort (ecommerge
);
733 ecommunity_free (&ecommerge
);
736 ecomm
= ecommunity_dup (ae
->ecommunity
);
738 if (ae
&& ae
->lcommunity
)
742 lcommerge
= lcommunity_merge (lcomm
, ae
->lcommunity
);
743 lcomm
= lcommunity_uniq_sort (lcommerge
);
744 lcommunity_free (&lcommerge
);
747 lcomm
= lcommunity_dup (ae
->lcommunity
);
751 attr
.aspath
= aspath
;
752 attr
.origin
= origin
;
755 attr
.community
= community
;
756 attr
.flag
|= ATTR_FLAG_BIT (BGP_ATTR_COMMUNITIES
);
760 ae
= bgp_attr_extra_get (&attr
);
761 ae
->ecommunity
= ecomm
;
762 attr
.flag
|= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES
);
765 /* Zap multipath attr nexthop so we set nexthop to self */
766 attr
.nexthop
.s_addr
= 0;
768 memset (&attr
.extra
->mp_nexthop_global
, 0, sizeof (struct in6_addr
));
770 /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */
773 new_attr
= bgp_attr_intern (&attr
);
774 bgp_attr_extra_free (&attr
);
776 if (new_attr
!= bgp_info_mpath_attr (new_best
))
778 if ((old_attr
= bgp_info_mpath_attr (new_best
)))
779 bgp_attr_unintern (&old_attr
);
780 bgp_info_mpath_attr_set (new_best
, new_attr
);
781 SET_FLAG (new_best
->flags
, BGP_INFO_ATTR_CHANGED
);
784 bgp_attr_unintern (&new_attr
);