]>
Commit | Line | Data |
---|---|---|
165b5fff JB |
1 | /* $QuaggaId: Format:%an, %ai, %h$ $ |
2 | * | |
3 | * BGP Multipath | |
4 | * Copyright (C) 2010 Google Inc. | |
5 | * | |
6 | * This file is part of Quagga | |
7 | * | |
8 | * Quagga is free software; you can redistribute it and/or modify it | |
9 | * under the terms of the GNU General Public License as published by the | |
10 | * Free Software Foundation; either version 2, or (at your option) any | |
11 | * later version. | |
12 | * | |
13 | * Quagga is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with Quagga; see the file COPYING. If not, write to the Free | |
20 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
21 | * 02111-1307, USA. | |
22 | */ | |
23 | ||
24 | #include <zebra.h> | |
25 | ||
26 | #include "command.h" | |
96450faf JB |
27 | #include "prefix.h" |
28 | #include "linklist.h" | |
29 | #include "sockunion.h" | |
de8d5dff | 30 | #include "memory.h" |
165b5fff JB |
31 | |
32 | #include "bgpd/bgpd.h" | |
96450faf JB |
33 | #include "bgpd/bgp_table.h" |
34 | #include "bgpd/bgp_route.h" | |
35 | #include "bgpd/bgp_attr.h" | |
de8d5dff | 36 | #include "bgpd/bgp_debug.h" |
165b5fff JB |
37 | #include "bgpd/bgp_mpath.h" |
38 | ||
39 | /* | |
40 | * bgp_maximum_paths_set | |
41 | * | |
42 | * Record maximum-paths configuration for BGP instance | |
43 | */ | |
44 | int | |
45 | bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi, | |
46 | int peertype, u_int16_t maxpaths) | |
47 | { | |
48 | if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX)) | |
49 | return -1; | |
50 | ||
51 | switch (peertype) | |
52 | { | |
53 | case BGP_PEER_IBGP: | |
54 | bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths; | |
55 | break; | |
56 | case BGP_PEER_EBGP: | |
57 | bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths; | |
58 | break; | |
59 | default: | |
60 | return -1; | |
61 | } | |
62 | ||
63 | return 0; | |
64 | } | |
65 | ||
66 | /* | |
67 | * bgp_maximum_paths_unset | |
68 | * | |
69 | * Remove maximum-paths configuration from BGP instance | |
70 | */ | |
71 | int | |
72 | bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi, | |
73 | int peertype) | |
74 | { | |
75 | if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX)) | |
76 | return -1; | |
77 | ||
78 | switch (peertype) | |
79 | { | |
80 | case BGP_PEER_IBGP: | |
81 | bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS; | |
82 | break; | |
83 | case BGP_PEER_EBGP: | |
84 | bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS; | |
85 | break; | |
86 | default: | |
87 | return -1; | |
88 | } | |
89 | ||
90 | return 0; | |
91 | } | |
96450faf JB |
92 | |
93 | /* | |
94 | * bgp_info_nexthop_cmp | |
95 | * | |
96 | * Compare the nexthops of two paths. Return value is less than, equal to, | |
97 | * or greater than zero if bi1 is respectively less than, equal to, | |
98 | * or greater than bi2. | |
99 | */ | |
100 | static int | |
101 | bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2) | |
102 | { | |
103 | struct attr_extra *ae1, *ae2; | |
104 | int compare; | |
105 | ||
106 | ae1 = bgp_attr_extra_get (bi1->attr); | |
107 | ae2 = bgp_attr_extra_get (bi2->attr); | |
108 | ||
109 | compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop); | |
110 | ||
111 | if (!compare && ae1 && ae2 && (ae1->mp_nexthop_len == ae2->mp_nexthop_len)) | |
112 | { | |
113 | switch (ae1->mp_nexthop_len) | |
114 | { | |
115 | case 4: | |
116 | case 12: | |
117 | compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in, | |
118 | &ae2->mp_nexthop_global_in); | |
119 | break; | |
120 | #ifdef HAVE_IPV6 | |
121 | case 16: | |
122 | compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global, | |
123 | &ae2->mp_nexthop_global); | |
124 | break; | |
125 | case 32: | |
126 | compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global, | |
127 | &ae2->mp_nexthop_global); | |
128 | if (!compare) | |
129 | compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local, | |
130 | &ae2->mp_nexthop_local); | |
131 | break; | |
132 | #endif /* HAVE_IPV6 */ | |
133 | } | |
134 | } | |
135 | ||
136 | return compare; | |
137 | } | |
138 | ||
139 | /* | |
140 | * bgp_info_mpath_cmp | |
141 | * | |
142 | * This function determines our multipath list ordering. By ordering | |
143 | * the list we can deterministically select which paths are included | |
144 | * in the multipath set. The ordering also helps in detecting changes | |
145 | * in the multipath selection so we can detect whether to send an | |
146 | * update to zebra. | |
147 | * | |
148 | * The order of paths is determined first by received nexthop, and then | |
149 | * by peer address if the nexthops are the same. | |
150 | */ | |
151 | static int | |
152 | bgp_info_mpath_cmp (void *val1, void *val2) | |
153 | { | |
154 | struct bgp_info *bi1, *bi2; | |
155 | int compare; | |
156 | ||
157 | bi1 = val1; | |
158 | bi2 = val2; | |
159 | ||
160 | compare = bgp_info_nexthop_cmp (bi1, bi2); | |
161 | ||
162 | if (!compare) | |
163 | compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote); | |
164 | ||
165 | return compare; | |
166 | } | |
167 | ||
168 | /* | |
169 | * bgp_mp_list_init | |
170 | * | |
171 | * Initialize the mp_list, which holds the list of multipaths | |
172 | * selected by bgp_best_selection | |
173 | */ | |
174 | void | |
175 | bgp_mp_list_init (struct list *mp_list) | |
176 | { | |
177 | assert (mp_list); | |
178 | memset (mp_list, 0, sizeof (struct list)); | |
179 | mp_list->cmp = bgp_info_mpath_cmp; | |
180 | } | |
181 | ||
182 | /* | |
183 | * bgp_mp_list_clear | |
184 | * | |
185 | * Clears all entries out of the mp_list | |
186 | */ | |
187 | void | |
188 | bgp_mp_list_clear (struct list *mp_list) | |
189 | { | |
190 | assert (mp_list); | |
191 | list_delete_all_node (mp_list); | |
192 | } | |
193 | ||
194 | /* | |
195 | * bgp_mp_list_add | |
196 | * | |
197 | * Adds a multipath entry to the mp_list | |
198 | */ | |
199 | void | |
200 | bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo) | |
201 | { | |
202 | assert (mp_list && mpinfo); | |
203 | listnode_add_sort (mp_list, mpinfo); | |
204 | } | |
de8d5dff JB |
205 | |
206 | /* | |
207 | * bgp_info_mpath_new | |
208 | * | |
209 | * Allocate and zero memory for a new bgp_info_mpath element | |
210 | */ | |
211 | static struct bgp_info_mpath * | |
212 | bgp_info_mpath_new (void) | |
213 | { | |
214 | struct bgp_info_mpath *new_mpath; | |
215 | new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath)); | |
216 | return new_mpath; | |
217 | } | |
218 | ||
219 | /* | |
220 | * bgp_info_mpath_free | |
221 | * | |
222 | * Release resources for a bgp_info_mpath element and zero out pointer | |
223 | */ | |
224 | void | |
225 | bgp_info_mpath_free (struct bgp_info_mpath **mpath) | |
226 | { | |
227 | if (mpath && *mpath) | |
228 | { | |
229 | XFREE (MTYPE_BGP_MPATH_INFO, *mpath); | |
230 | *mpath = NULL; | |
231 | } | |
232 | } | |
233 | ||
234 | /* | |
235 | * bgp_info_mpath_get | |
236 | * | |
237 | * Fetch the mpath element for the given bgp_info. Used for | |
238 | * doing lazy allocation. | |
239 | */ | |
240 | static struct bgp_info_mpath * | |
241 | bgp_info_mpath_get (struct bgp_info *binfo) | |
242 | { | |
243 | struct bgp_info_mpath *mpath; | |
244 | if (!binfo->mpath) | |
245 | { | |
246 | mpath = bgp_info_mpath_new(); | |
247 | if (!mpath) | |
248 | return NULL; | |
249 | binfo->mpath = mpath; | |
250 | mpath->mp_info = binfo; | |
251 | } | |
252 | return binfo->mpath; | |
253 | } | |
254 | ||
255 | /* | |
256 | * bgp_info_mpath_enqueue | |
257 | * | |
258 | * Enqueue a path onto the multipath list given the previous multipath | |
259 | * list entry | |
260 | */ | |
261 | static void | |
262 | bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo) | |
263 | { | |
264 | struct bgp_info_mpath *prev, *mpath; | |
265 | ||
266 | prev = bgp_info_mpath_get (prev_info); | |
267 | mpath = bgp_info_mpath_get (binfo); | |
268 | if (!prev || !mpath) | |
269 | return; | |
270 | ||
271 | mpath->mp_next = prev->mp_next; | |
272 | mpath->mp_prev = prev; | |
273 | if (prev->mp_next) | |
274 | prev->mp_next->mp_prev = mpath; | |
275 | prev->mp_next = mpath; | |
276 | ||
277 | SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH); | |
278 | } | |
279 | ||
280 | /* | |
281 | * bgp_info_mpath_dequeue | |
282 | * | |
283 | * Remove a path from the multipath list | |
284 | */ | |
285 | void | |
286 | bgp_info_mpath_dequeue (struct bgp_info *binfo) | |
287 | { | |
288 | struct bgp_info_mpath *mpath = binfo->mpath; | |
289 | if (!mpath) | |
290 | return; | |
291 | if (mpath->mp_prev) | |
292 | mpath->mp_prev->mp_next = mpath->mp_next; | |
293 | if (mpath->mp_next) | |
294 | mpath->mp_next->mp_prev = mpath->mp_prev; | |
295 | mpath->mp_next = mpath->mp_prev = NULL; | |
296 | UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH); | |
297 | } | |
298 | ||
299 | /* | |
300 | * bgp_info_mpath_next | |
301 | * | |
302 | * Given a bgp_info, return the next multipath entry | |
303 | */ | |
304 | struct bgp_info * | |
305 | bgp_info_mpath_next (struct bgp_info *binfo) | |
306 | { | |
307 | if (!binfo->mpath || !binfo->mpath->mp_next) | |
308 | return NULL; | |
309 | return binfo->mpath->mp_next->mp_info; | |
310 | } | |
311 | ||
312 | /* | |
313 | * bgp_info_mpath_first | |
314 | * | |
315 | * Given bestpath bgp_info, return the first multipath entry. | |
316 | */ | |
317 | struct bgp_info * | |
318 | bgp_info_mpath_first (struct bgp_info *binfo) | |
319 | { | |
320 | return bgp_info_mpath_next (binfo); | |
321 | } | |
322 | ||
323 | /* | |
324 | * bgp_info_mpath_count | |
325 | * | |
326 | * Given the bestpath bgp_info, return the number of multipath entries | |
327 | */ | |
328 | u_int32_t | |
329 | bgp_info_mpath_count (struct bgp_info *binfo) | |
330 | { | |
331 | if (!binfo->mpath) | |
332 | return 0; | |
333 | return binfo->mpath->mp_count; | |
334 | } | |
335 | ||
336 | /* | |
337 | * bgp_info_mpath_count_set | |
338 | * | |
339 | * Sets the count of multipaths into bestpath's mpath element | |
340 | */ | |
341 | static void | |
342 | bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count) | |
343 | { | |
344 | struct bgp_info_mpath *mpath; | |
345 | if (!count && !binfo->mpath) | |
346 | return; | |
347 | mpath = bgp_info_mpath_get (binfo); | |
348 | if (!mpath) | |
349 | return; | |
350 | mpath->mp_count = count; | |
351 | } | |
352 | ||
353 | /* | |
354 | * bgp_info_mpath_update | |
355 | * | |
356 | * Compare and sync up the multipath list with the mp_list generated by | |
357 | * bgp_best_selection | |
358 | */ | |
359 | void | |
360 | bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best, | |
361 | struct bgp_info *old_best, struct list *mp_list, | |
362 | struct bgp_maxpaths_cfg *mpath_cfg) | |
363 | { | |
364 | u_int16_t maxpaths, mpath_count, old_mpath_count; | |
365 | struct listnode *mp_node, *mp_next_node; | |
366 | struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath; | |
367 | int mpath_changed, debug; | |
368 | char pfx_buf[INET_ADDRSTRLEN], nh_buf[2][INET_ADDRSTRLEN]; | |
369 | ||
370 | mpath_changed = 0; | |
371 | maxpaths = BGP_DEFAULT_MAXPATHS; | |
372 | mpath_count = 0; | |
373 | cur_mpath = NULL; | |
374 | old_mpath_count = 0; | |
375 | prev_mpath = new_best; | |
376 | mp_node = listhead (mp_list); | |
377 | debug = BGP_DEBUG (events, EVENTS); | |
378 | ||
379 | if (debug) | |
380 | prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf)); | |
381 | ||
382 | if (new_best) | |
383 | { | |
384 | mpath_count++; | |
385 | if (new_best != old_best) | |
386 | bgp_info_mpath_dequeue (new_best); | |
387 | maxpaths = (peer_sort (new_best->peer) == BGP_PEER_IBGP) ? | |
388 | mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp; | |
389 | } | |
390 | ||
391 | if (old_best) | |
392 | { | |
393 | cur_mpath = bgp_info_mpath_first (old_best); | |
394 | old_mpath_count = bgp_info_mpath_count (old_best); | |
395 | bgp_info_mpath_count_set (old_best, 0); | |
396 | bgp_info_mpath_dequeue (old_best); | |
397 | } | |
398 | ||
399 | /* | |
400 | * We perform an ordered walk through both lists in parallel. | |
401 | * The reason for the ordered walk is that if there are paths | |
402 | * that were previously multipaths and are still multipaths, the walk | |
403 | * should encounter them in both lists at the same time. Otherwise | |
404 | * there will be paths that are in one list or another, and we | |
405 | * will deal with these separately. | |
406 | * | |
407 | * Note that new_best might be somewhere in the mp_list, so we need | |
408 | * to skip over it | |
409 | */ | |
410 | while (mp_node || cur_mpath) | |
411 | { | |
412 | /* | |
413 | * We can bail out of this loop if all existing paths on the | |
414 | * multipath list have been visited (for cleanup purposes) and | |
415 | * the maxpath requirement is fulfulled | |
416 | */ | |
417 | if (!cur_mpath && (mpath_count >= maxpaths)) | |
418 | break; | |
419 | ||
420 | mp_next_node = mp_node ? listnextnode (mp_node) : NULL; | |
421 | next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL; | |
422 | ||
423 | /* | |
424 | * If equal, the path was a multipath and is still a multipath. | |
425 | * Insert onto new multipath list if maxpaths allows. | |
426 | */ | |
427 | if (mp_node && (listgetdata (mp_node) == cur_mpath)) | |
428 | { | |
429 | list_delete_node (mp_list, mp_node); | |
430 | bgp_info_mpath_dequeue (cur_mpath); | |
431 | if ((mpath_count < maxpaths) && | |
432 | bgp_info_nexthop_cmp (prev_mpath, cur_mpath)) | |
433 | { | |
434 | bgp_info_mpath_enqueue (prev_mpath, cur_mpath); | |
435 | prev_mpath = cur_mpath; | |
436 | mpath_count++; | |
437 | } | |
438 | else | |
439 | { | |
440 | mpath_changed = 1; | |
441 | if (debug) | |
442 | zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf, | |
443 | inet_ntop (AF_INET, &cur_mpath->attr->nexthop, | |
444 | nh_buf[0], sizeof (nh_buf[0])), | |
445 | sockunion2str (cur_mpath->peer->su_remote, | |
446 | nh_buf[1], sizeof (nh_buf[1]))); | |
447 | } | |
448 | mp_node = mp_next_node; | |
449 | cur_mpath = next_mpath; | |
450 | continue; | |
451 | } | |
452 | ||
453 | if (cur_mpath && (!mp_node || | |
454 | (bgp_info_mpath_cmp (cur_mpath, | |
455 | listgetdata (mp_node)) < 0))) | |
456 | { | |
457 | /* | |
458 | * If here, we have an old multipath and either the mp_list | |
459 | * is finished or the next mp_node points to a later | |
460 | * multipath, so we need to purge this path from the | |
461 | * multipath list | |
462 | */ | |
463 | bgp_info_mpath_dequeue (cur_mpath); | |
464 | mpath_changed = 1; | |
465 | if (debug) | |
466 | zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf, | |
467 | inet_ntop (AF_INET, &cur_mpath->attr->nexthop, | |
468 | nh_buf[0], sizeof (nh_buf[0])), | |
469 | sockunion2str (cur_mpath->peer->su_remote, | |
470 | nh_buf[1], sizeof (nh_buf[1]))); | |
471 | cur_mpath = next_mpath; | |
472 | } | |
473 | else | |
474 | { | |
475 | /* | |
476 | * If here, we have a path on the mp_list that was not previously | |
477 | * a multipath (due to non-equivalance or maxpaths exceeded), | |
478 | * or the matching multipath is sorted later in the multipath | |
479 | * list. Before we enqueue the path on the new multipath list, | |
480 | * make sure its not on the old_best multipath list or referenced | |
481 | * via next_mpath: | |
482 | * - If next_mpath points to this new path, update next_mpath to | |
483 | * point to the multipath after this one | |
484 | * - Dequeue the path from the multipath list just to make sure | |
485 | */ | |
486 | new_mpath = listgetdata (mp_node); | |
487 | list_delete_node (mp_list, mp_node); | |
488 | if (new_mpath == next_mpath) | |
489 | next_mpath = bgp_info_mpath_next (new_mpath); | |
490 | bgp_info_mpath_dequeue (new_mpath); | |
491 | if ((mpath_count < maxpaths) && (new_mpath != new_best) && | |
492 | bgp_info_nexthop_cmp (prev_mpath, new_mpath)) | |
493 | { | |
494 | bgp_info_mpath_enqueue (prev_mpath, new_mpath); | |
495 | prev_mpath = new_mpath; | |
496 | mpath_changed = 1; | |
497 | mpath_count++; | |
498 | if (debug) | |
499 | zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf, | |
500 | inet_ntop (AF_INET, &new_mpath->attr->nexthop, | |
501 | nh_buf[0], sizeof (nh_buf[0])), | |
502 | sockunion2str (new_mpath->peer->su_remote, | |
503 | nh_buf[1], sizeof (nh_buf[1]))); | |
504 | } | |
505 | mp_node = mp_next_node; | |
506 | } | |
507 | } | |
508 | ||
509 | if (new_best) | |
510 | { | |
511 | bgp_info_mpath_count_set (new_best, mpath_count-1); | |
512 | if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count)) | |
513 | SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG); | |
514 | } | |
515 | } | |
6918e74b JB |
516 | |
517 | /* | |
518 | * bgp_mp_dmed_deselect | |
519 | * | |
520 | * Clean up multipath information for BGP_INFO_DMED_SELECTED path that | |
521 | * is not selected as best path | |
522 | */ | |
523 | void | |
524 | bgp_mp_dmed_deselect (struct bgp_info *dmed_best) | |
525 | { | |
526 | struct bgp_info *mpinfo, *mpnext; | |
527 | ||
528 | if (!dmed_best) | |
529 | return; | |
530 | ||
531 | for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext) | |
532 | { | |
533 | mpnext = bgp_info_mpath_next (mpinfo); | |
534 | bgp_info_mpath_dequeue (mpinfo); | |
535 | } | |
536 | ||
537 | bgp_info_mpath_count_set (dmed_best, 0); | |
538 | UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG); | |
539 | assert (bgp_info_mpath_first (dmed_best) == 0); | |
540 | } |