]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_mpath.c
bgpd: For deterministic MED build a multipath set for each peer AS as the
[mirror_frr.git] / bgpd / bgp_mpath.c
CommitLineData
165b5fff
JB
1/* $QuaggaId: Format:%an, %ai, %h$ $
2 *
3 * BGP Multipath
4 * Copyright (C) 2010 Google Inc.
5 *
6 * This file is part of Quagga
7 *
8 * Quagga is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * Quagga is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with Quagga; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23
24#include <zebra.h>
25
26#include "command.h"
96450faf
JB
27#include "prefix.h"
28#include "linklist.h"
29#include "sockunion.h"
de8d5dff 30#include "memory.h"
165b5fff
JB
31
32#include "bgpd/bgpd.h"
96450faf
JB
33#include "bgpd/bgp_table.h"
34#include "bgpd/bgp_route.h"
35#include "bgpd/bgp_attr.h"
de8d5dff 36#include "bgpd/bgp_debug.h"
165b5fff
JB
37#include "bgpd/bgp_mpath.h"
38
39/*
40 * bgp_maximum_paths_set
41 *
42 * Record maximum-paths configuration for BGP instance
43 */
44int
45bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi,
46 int peertype, u_int16_t maxpaths)
47{
48 if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
49 return -1;
50
51 switch (peertype)
52 {
53 case BGP_PEER_IBGP:
54 bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths;
55 break;
56 case BGP_PEER_EBGP:
57 bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths;
58 break;
59 default:
60 return -1;
61 }
62
63 return 0;
64}
65
66/*
67 * bgp_maximum_paths_unset
68 *
69 * Remove maximum-paths configuration from BGP instance
70 */
71int
72bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi,
73 int peertype)
74{
75 if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
76 return -1;
77
78 switch (peertype)
79 {
80 case BGP_PEER_IBGP:
81 bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
82 break;
83 case BGP_PEER_EBGP:
84 bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
85 break;
86 default:
87 return -1;
88 }
89
90 return 0;
91}
96450faf
JB
92
93/*
94 * bgp_info_nexthop_cmp
95 *
96 * Compare the nexthops of two paths. Return value is less than, equal to,
97 * or greater than zero if bi1 is respectively less than, equal to,
98 * or greater than bi2.
99 */
100static int
101bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2)
102{
103 struct attr_extra *ae1, *ae2;
104 int compare;
105
106 ae1 = bgp_attr_extra_get (bi1->attr);
107 ae2 = bgp_attr_extra_get (bi2->attr);
108
109 compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop);
110
111 if (!compare && ae1 && ae2 && (ae1->mp_nexthop_len == ae2->mp_nexthop_len))
112 {
113 switch (ae1->mp_nexthop_len)
114 {
115 case 4:
116 case 12:
117 compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in,
118 &ae2->mp_nexthop_global_in);
119 break;
120#ifdef HAVE_IPV6
121 case 16:
122 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
123 &ae2->mp_nexthop_global);
124 break;
125 case 32:
126 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
127 &ae2->mp_nexthop_global);
128 if (!compare)
129 compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local,
130 &ae2->mp_nexthop_local);
131 break;
132#endif /* HAVE_IPV6 */
133 }
134 }
135
136 return compare;
137}
138
139/*
140 * bgp_info_mpath_cmp
141 *
142 * This function determines our multipath list ordering. By ordering
143 * the list we can deterministically select which paths are included
144 * in the multipath set. The ordering also helps in detecting changes
145 * in the multipath selection so we can detect whether to send an
146 * update to zebra.
147 *
148 * The order of paths is determined first by received nexthop, and then
149 * by peer address if the nexthops are the same.
150 */
151static int
152bgp_info_mpath_cmp (void *val1, void *val2)
153{
154 struct bgp_info *bi1, *bi2;
155 int compare;
156
157 bi1 = val1;
158 bi2 = val2;
159
160 compare = bgp_info_nexthop_cmp (bi1, bi2);
161
162 if (!compare)
163 compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote);
164
165 return compare;
166}
167
168/*
169 * bgp_mp_list_init
170 *
171 * Initialize the mp_list, which holds the list of multipaths
172 * selected by bgp_best_selection
173 */
174void
175bgp_mp_list_init (struct list *mp_list)
176{
177 assert (mp_list);
178 memset (mp_list, 0, sizeof (struct list));
179 mp_list->cmp = bgp_info_mpath_cmp;
180}
181
182/*
183 * bgp_mp_list_clear
184 *
185 * Clears all entries out of the mp_list
186 */
187void
188bgp_mp_list_clear (struct list *mp_list)
189{
190 assert (mp_list);
191 list_delete_all_node (mp_list);
192}
193
194/*
195 * bgp_mp_list_add
196 *
197 * Adds a multipath entry to the mp_list
198 */
199void
200bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo)
201{
202 assert (mp_list && mpinfo);
203 listnode_add_sort (mp_list, mpinfo);
204}
de8d5dff
JB
205
206/*
207 * bgp_info_mpath_new
208 *
209 * Allocate and zero memory for a new bgp_info_mpath element
210 */
211static struct bgp_info_mpath *
212bgp_info_mpath_new (void)
213{
214 struct bgp_info_mpath *new_mpath;
215 new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath));
216 return new_mpath;
217}
218
219/*
220 * bgp_info_mpath_free
221 *
222 * Release resources for a bgp_info_mpath element and zero out pointer
223 */
224void
225bgp_info_mpath_free (struct bgp_info_mpath **mpath)
226{
227 if (mpath && *mpath)
228 {
229 XFREE (MTYPE_BGP_MPATH_INFO, *mpath);
230 *mpath = NULL;
231 }
232}
233
234/*
235 * bgp_info_mpath_get
236 *
237 * Fetch the mpath element for the given bgp_info. Used for
238 * doing lazy allocation.
239 */
240static struct bgp_info_mpath *
241bgp_info_mpath_get (struct bgp_info *binfo)
242{
243 struct bgp_info_mpath *mpath;
244 if (!binfo->mpath)
245 {
246 mpath = bgp_info_mpath_new();
247 if (!mpath)
248 return NULL;
249 binfo->mpath = mpath;
250 mpath->mp_info = binfo;
251 }
252 return binfo->mpath;
253}
254
255/*
256 * bgp_info_mpath_enqueue
257 *
258 * Enqueue a path onto the multipath list given the previous multipath
259 * list entry
260 */
261static void
262bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo)
263{
264 struct bgp_info_mpath *prev, *mpath;
265
266 prev = bgp_info_mpath_get (prev_info);
267 mpath = bgp_info_mpath_get (binfo);
268 if (!prev || !mpath)
269 return;
270
271 mpath->mp_next = prev->mp_next;
272 mpath->mp_prev = prev;
273 if (prev->mp_next)
274 prev->mp_next->mp_prev = mpath;
275 prev->mp_next = mpath;
276
277 SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
278}
279
280/*
281 * bgp_info_mpath_dequeue
282 *
283 * Remove a path from the multipath list
284 */
285void
286bgp_info_mpath_dequeue (struct bgp_info *binfo)
287{
288 struct bgp_info_mpath *mpath = binfo->mpath;
289 if (!mpath)
290 return;
291 if (mpath->mp_prev)
292 mpath->mp_prev->mp_next = mpath->mp_next;
293 if (mpath->mp_next)
294 mpath->mp_next->mp_prev = mpath->mp_prev;
295 mpath->mp_next = mpath->mp_prev = NULL;
296 UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
297}
298
299/*
300 * bgp_info_mpath_next
301 *
302 * Given a bgp_info, return the next multipath entry
303 */
304struct bgp_info *
305bgp_info_mpath_next (struct bgp_info *binfo)
306{
307 if (!binfo->mpath || !binfo->mpath->mp_next)
308 return NULL;
309 return binfo->mpath->mp_next->mp_info;
310}
311
312/*
313 * bgp_info_mpath_first
314 *
315 * Given bestpath bgp_info, return the first multipath entry.
316 */
317struct bgp_info *
318bgp_info_mpath_first (struct bgp_info *binfo)
319{
320 return bgp_info_mpath_next (binfo);
321}
322
323/*
324 * bgp_info_mpath_count
325 *
326 * Given the bestpath bgp_info, return the number of multipath entries
327 */
328u_int32_t
329bgp_info_mpath_count (struct bgp_info *binfo)
330{
331 if (!binfo->mpath)
332 return 0;
333 return binfo->mpath->mp_count;
334}
335
336/*
337 * bgp_info_mpath_count_set
338 *
339 * Sets the count of multipaths into bestpath's mpath element
340 */
341static void
342bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count)
343{
344 struct bgp_info_mpath *mpath;
345 if (!count && !binfo->mpath)
346 return;
347 mpath = bgp_info_mpath_get (binfo);
348 if (!mpath)
349 return;
350 mpath->mp_count = count;
351}
352
353/*
354 * bgp_info_mpath_update
355 *
356 * Compare and sync up the multipath list with the mp_list generated by
357 * bgp_best_selection
358 */
359void
360bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best,
361 struct bgp_info *old_best, struct list *mp_list,
362 struct bgp_maxpaths_cfg *mpath_cfg)
363{
364 u_int16_t maxpaths, mpath_count, old_mpath_count;
365 struct listnode *mp_node, *mp_next_node;
366 struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath;
367 int mpath_changed, debug;
368 char pfx_buf[INET_ADDRSTRLEN], nh_buf[2][INET_ADDRSTRLEN];
369
370 mpath_changed = 0;
371 maxpaths = BGP_DEFAULT_MAXPATHS;
372 mpath_count = 0;
373 cur_mpath = NULL;
374 old_mpath_count = 0;
375 prev_mpath = new_best;
376 mp_node = listhead (mp_list);
377 debug = BGP_DEBUG (events, EVENTS);
378
379 if (debug)
380 prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf));
381
382 if (new_best)
383 {
384 mpath_count++;
385 if (new_best != old_best)
386 bgp_info_mpath_dequeue (new_best);
387 maxpaths = (peer_sort (new_best->peer) == BGP_PEER_IBGP) ?
388 mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp;
389 }
390
391 if (old_best)
392 {
393 cur_mpath = bgp_info_mpath_first (old_best);
394 old_mpath_count = bgp_info_mpath_count (old_best);
395 bgp_info_mpath_count_set (old_best, 0);
396 bgp_info_mpath_dequeue (old_best);
397 }
398
399 /*
400 * We perform an ordered walk through both lists in parallel.
401 * The reason for the ordered walk is that if there are paths
402 * that were previously multipaths and are still multipaths, the walk
403 * should encounter them in both lists at the same time. Otherwise
404 * there will be paths that are in one list or another, and we
405 * will deal with these separately.
406 *
407 * Note that new_best might be somewhere in the mp_list, so we need
408 * to skip over it
409 */
410 while (mp_node || cur_mpath)
411 {
412 /*
413 * We can bail out of this loop if all existing paths on the
414 * multipath list have been visited (for cleanup purposes) and
415 * the maxpath requirement is fulfulled
416 */
417 if (!cur_mpath && (mpath_count >= maxpaths))
418 break;
419
420 mp_next_node = mp_node ? listnextnode (mp_node) : NULL;
421 next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL;
422
423 /*
424 * If equal, the path was a multipath and is still a multipath.
425 * Insert onto new multipath list if maxpaths allows.
426 */
427 if (mp_node && (listgetdata (mp_node) == cur_mpath))
428 {
429 list_delete_node (mp_list, mp_node);
430 bgp_info_mpath_dequeue (cur_mpath);
431 if ((mpath_count < maxpaths) &&
432 bgp_info_nexthop_cmp (prev_mpath, cur_mpath))
433 {
434 bgp_info_mpath_enqueue (prev_mpath, cur_mpath);
435 prev_mpath = cur_mpath;
436 mpath_count++;
437 }
438 else
439 {
440 mpath_changed = 1;
441 if (debug)
442 zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
443 inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
444 nh_buf[0], sizeof (nh_buf[0])),
445 sockunion2str (cur_mpath->peer->su_remote,
446 nh_buf[1], sizeof (nh_buf[1])));
447 }
448 mp_node = mp_next_node;
449 cur_mpath = next_mpath;
450 continue;
451 }
452
453 if (cur_mpath && (!mp_node ||
454 (bgp_info_mpath_cmp (cur_mpath,
455 listgetdata (mp_node)) < 0)))
456 {
457 /*
458 * If here, we have an old multipath and either the mp_list
459 * is finished or the next mp_node points to a later
460 * multipath, so we need to purge this path from the
461 * multipath list
462 */
463 bgp_info_mpath_dequeue (cur_mpath);
464 mpath_changed = 1;
465 if (debug)
466 zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
467 inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
468 nh_buf[0], sizeof (nh_buf[0])),
469 sockunion2str (cur_mpath->peer->su_remote,
470 nh_buf[1], sizeof (nh_buf[1])));
471 cur_mpath = next_mpath;
472 }
473 else
474 {
475 /*
476 * If here, we have a path on the mp_list that was not previously
477 * a multipath (due to non-equivalance or maxpaths exceeded),
478 * or the matching multipath is sorted later in the multipath
479 * list. Before we enqueue the path on the new multipath list,
480 * make sure its not on the old_best multipath list or referenced
481 * via next_mpath:
482 * - If next_mpath points to this new path, update next_mpath to
483 * point to the multipath after this one
484 * - Dequeue the path from the multipath list just to make sure
485 */
486 new_mpath = listgetdata (mp_node);
487 list_delete_node (mp_list, mp_node);
488 if (new_mpath == next_mpath)
489 next_mpath = bgp_info_mpath_next (new_mpath);
490 bgp_info_mpath_dequeue (new_mpath);
491 if ((mpath_count < maxpaths) && (new_mpath != new_best) &&
492 bgp_info_nexthop_cmp (prev_mpath, new_mpath))
493 {
494 bgp_info_mpath_enqueue (prev_mpath, new_mpath);
495 prev_mpath = new_mpath;
496 mpath_changed = 1;
497 mpath_count++;
498 if (debug)
499 zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf,
500 inet_ntop (AF_INET, &new_mpath->attr->nexthop,
501 nh_buf[0], sizeof (nh_buf[0])),
502 sockunion2str (new_mpath->peer->su_remote,
503 nh_buf[1], sizeof (nh_buf[1])));
504 }
505 mp_node = mp_next_node;
506 }
507 }
508
509 if (new_best)
510 {
511 bgp_info_mpath_count_set (new_best, mpath_count-1);
512 if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count))
513 SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG);
514 }
515}
6918e74b
JB
516
517/*
518 * bgp_mp_dmed_deselect
519 *
520 * Clean up multipath information for BGP_INFO_DMED_SELECTED path that
521 * is not selected as best path
522 */
523void
524bgp_mp_dmed_deselect (struct bgp_info *dmed_best)
525{
526 struct bgp_info *mpinfo, *mpnext;
527
528 if (!dmed_best)
529 return;
530
531 for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext)
532 {
533 mpnext = bgp_info_mpath_next (mpinfo);
534 bgp_info_mpath_dequeue (mpinfo);
535 }
536
537 bgp_info_mpath_count_set (dmed_best, 0);
538 UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG);
539 assert (bgp_info_mpath_first (dmed_best) == 0);
540}