1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING. If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_nht.h"
43 #include "bgpd/bgp_fsm.h"
44 #include "bgpd/bgp_zebra.h"
46 extern struct zclient
*zclient
;
48 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
49 int is_bgp_static_route
);
50 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
51 int is_bgp_static_route
);
52 static void evaluate_paths(struct bgp_nexthop_cache
*bnc
);
53 static int make_prefix(int afi
, struct bgp_info
*ri
, struct prefix
*p
);
54 static void path_nh_map(struct bgp_info
*path
, struct bgp_nexthop_cache
*bnc
,
58 bgp_isvalid_nexthop (struct bgp_nexthop_cache
*bnc
)
60 return (bgp_zebra_num_connects() == 0 ||
61 (bnc
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
)));
65 bgp_find_nexthop (struct bgp_info
*path
, int connected
)
67 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
72 if (connected
&& !(CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)))
75 return (bgp_isvalid_nexthop(bnc
));
79 bgp_unlink_nexthop_check (struct bgp_nexthop_cache
*bnc
)
81 if (LIST_EMPTY(&(bnc
->paths
)) && !bnc
->nht_info
)
83 if (BGP_DEBUG(nht
, NHT
))
85 char buf
[PREFIX2STR_BUFFER
];
86 zlog_debug("bgp_unlink_nexthop: freeing bnc %s",
87 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
));
89 unregister_zebra_rnh(bnc
, CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
));
90 bnc
->node
->info
= NULL
;
91 bgp_unlock_node(bnc
->node
);
98 bgp_unlink_nexthop (struct bgp_info
*path
)
100 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
105 path_nh_map(path
, NULL
, 0);
107 bgp_unlink_nexthop_check (bnc
);
111 bgp_unlink_nexthop_by_peer (struct peer
*peer
)
115 struct bgp_nexthop_cache
*bnc
;
116 afi_t afi
= family2afi(peer
->su
.sa
.sa_family
);
118 if (! sockunion2hostprefix (&peer
->su
, &p
))
121 rn
= bgp_node_get (peer
->bgp
->nexthop_cache_table
[afi
], &p
);
128 /* cleanup the peer reference */
129 bnc
->nht_info
= NULL
;
131 bgp_unlink_nexthop_check (bnc
);
135 bgp_find_or_add_nexthop (struct bgp
*bgp
, afi_t afi
, struct bgp_info
*ri
,
136 struct peer
*peer
, int connected
)
139 struct bgp_nexthop_cache
*bnc
;
141 int is_bgp_static_route
= 0;
145 is_bgp_static_route
= ((ri
->type
== ZEBRA_ROUTE_BGP
) &&
146 (ri
->sub_type
== BGP_ROUTE_STATIC
)) ? 1 : 0;
148 /* Since Extended Next-hop Encoding (RFC5549) support, we want to derive
149 address-family from the next-hop. */
150 if (!is_bgp_static_route
)
151 afi
= BGP_ATTR_NEXTHOP_AFI_IP6(ri
->attr
) ? AFI_IP6
: AFI_IP
;
153 /* This will return TRUE if the global IPv6 NH is a link local addr */
154 if (make_prefix(afi
, ri
, &p
) < 0)
159 /* Don't register link local NH */
160 if (afi
== AFI_IP6
&& IN6_IS_ADDR_LINKLOCAL (&peer
->su
.sin6
.sin6_addr
))
163 if (! sockunion2hostprefix (&peer
->su
, &p
))
165 if (BGP_DEBUG(nht
, NHT
))
167 zlog_debug("%s: Attempting to register with unknown AFI %d (not %d or %d)",
168 __FUNCTION__
, afi
, AFI_IP
, AFI_IP6
);
176 if (is_bgp_static_route
)
177 rn
= bgp_node_get (bgp
->import_check_table
[afi
], &p
);
179 rn
= bgp_node_get (bgp
->nexthop_cache_table
[afi
], &p
);
188 if (BGP_DEBUG(nht
, NHT
))
190 char buf
[PREFIX2STR_BUFFER
];
192 zlog_debug("Allocated bnc %s peer %p",
193 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
), peer
);
198 bgp_unlock_node (rn
);
200 if (is_bgp_static_route
)
202 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
);
204 /* If we're toggling the type, re-register */
205 if ((bgp_flag_check(bgp
, BGP_FLAG_IMPORT_CHECK
)) &&
206 !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
208 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
209 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
210 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
212 else if ((!bgp_flag_check(bgp
, BGP_FLAG_IMPORT_CHECK
)) &&
213 CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
215 UNSET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
216 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
217 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
220 /* When nexthop is already known, but now requires 'connected' resolution,
221 * re-register it. The reverse scenario where the nexthop currently requires
222 * 'connected' resolution does not need a re-register (i.e., we treat
223 * 'connected-required' as an override) except in the scenario where this
224 * is actually a case of tracking a peer for connectivity (e.g., after
225 * disable connected-check).
226 * NOTE: We don't track the number of paths separately for 'connected-
227 * required' vs 'connected-not-required' as this change is not a common
230 else if (connected
&& ! CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
))
232 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
233 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
234 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
236 else if (peer
&& !connected
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
))
238 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
239 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
240 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
243 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
244 register_zebra_rnh(bnc
, is_bgp_static_route
);
246 if (ri
&& ri
->nexthop
!= bnc
)
248 /* Unlink from existing nexthop cache, if any. This will also free
249 * the nexthop cache entry, if appropriate.
251 bgp_unlink_nexthop (ri
);
253 path_nh_map(ri
, bnc
, 1); /* updates NHT ri list reference */
255 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
256 (bgp_info_extra_get(ri
))->igpmetric
= bnc
->metric
;
258 ri
->extra
->igpmetric
= 0;
261 bnc
->nht_info
= (void *)peer
; /* NHT peer reference */
263 return (bgp_isvalid_nexthop(bnc
));
267 bgp_delete_connected_nexthop (afi_t afi
, struct peer
*peer
)
270 struct bgp_nexthop_cache
*bnc
;
276 /* We don't register link local address for NHT */
277 if (afi
== AFI_IP6
&& IN6_IS_ADDR_LINKLOCAL (&peer
->su
.sin6
.sin6_addr
))
280 if (! sockunion2hostprefix (&peer
->su
, &p
))
283 rn
= bgp_node_lookup(peer
->bgp
->nexthop_cache_table
[family2afi(p
.family
)], &p
);
284 if (!rn
|| !rn
->info
)
286 if (BGP_DEBUG(nht
, NHT
))
287 zlog_debug("Cannot find connected NHT node for peer %s", peer
->host
);
289 bgp_unlock_node (rn
);
296 if (bnc
->nht_info
!= peer
)
298 if (BGP_DEBUG(nht
, NHT
))
299 zlog_debug("Connected NHT %p node for peer %s points to %p",
300 bnc
, peer
->host
, bnc
->nht_info
);
304 bnc
->nht_info
= NULL
;
306 if (LIST_EMPTY(&(bnc
->paths
)))
308 if (BGP_DEBUG(nht
, NHT
))
309 zlog_debug("Freeing connected NHT node %p for peer %s",
311 unregister_zebra_rnh(bnc
, 0);
312 bnc
->node
->info
= NULL
;
313 bgp_unlock_node(bnc
->node
);
319 bgp_parse_nexthop_update (int command
, vrf_id_t vrf_id
)
322 struct bgp_node
*rn
= NULL
;
323 struct bgp_nexthop_cache
*bnc
;
324 struct nexthop
*nexthop
;
325 struct nexthop
*oldnh
;
326 struct nexthop
*nhlist_head
= NULL
;
327 struct nexthop
*nhlist_tail
= NULL
;
334 bgp
= bgp_lookup_by_vrf_id (vrf_id
);
337 zlog_err("parse nexthop update: instance not found for vrf_id %d", vrf_id
);
343 memset(&p
, 0, sizeof(struct prefix
));
344 p
.family
= stream_getw(s
);
345 p
.prefixlen
= stream_getc(s
);
349 p
.u
.prefix4
.s_addr
= stream_get_ipv4 (s
);
352 stream_get(&p
.u
.prefix6
, s
, 16);
358 if (command
== ZEBRA_NEXTHOP_UPDATE
)
359 rn
= bgp_node_lookup(bgp
->nexthop_cache_table
[family2afi(p
.family
)], &p
);
360 else if (command
== ZEBRA_IMPORT_CHECK_UPDATE
)
361 rn
= bgp_node_lookup(bgp
->import_check_table
[family2afi(p
.family
)], &p
);
363 if (!rn
|| !rn
->info
)
365 if (BGP_DEBUG(nht
, NHT
))
367 char buf
[PREFIX2STR_BUFFER
];
368 prefix2str(&p
, buf
, sizeof(buf
));
369 zlog_debug("parse nexthop update(%s): rn not found", buf
);
372 bgp_unlock_node (rn
);
377 bgp_unlock_node (rn
);
378 bnc
->last_update
= bgp_clock();
379 bnc
->change_flags
= 0;
380 metric
= stream_getl (s
);
381 nexthop_num
= stream_getc (s
);
383 /* debug print the input */
384 if (BGP_DEBUG(nht
, NHT
))
386 char buf
[PREFIX2STR_BUFFER
];
387 prefix2str(&p
, buf
, sizeof (buf
));
388 zlog_debug("%d: NH update for %s - metric %d (cur %d) #nhops %d (cur %d)",
389 vrf_id
, buf
, metric
, bnc
->metric
, nexthop_num
, bnc
->nexthop_num
);
392 if (metric
!= bnc
->metric
)
393 bnc
->change_flags
|= BGP_NEXTHOP_METRIC_CHANGED
;
395 if(nexthop_num
!= bnc
->nexthop_num
)
396 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
400 /* notify bgp fsm if nbr ip goes from invalid->valid */
401 if (!bnc
->nexthop_num
)
402 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
404 bnc
->flags
|= BGP_NEXTHOP_VALID
;
405 bnc
->metric
= metric
;
406 bnc
->nexthop_num
= nexthop_num
;
408 for (i
= 0; i
< nexthop_num
; i
++)
410 nexthop
= nexthop_new();
411 nexthop
->type
= stream_getc (s
);
412 switch (nexthop
->type
)
414 case NEXTHOP_TYPE_IPV4
:
415 nexthop
->gate
.ipv4
.s_addr
= stream_get_ipv4 (s
);
417 case NEXTHOP_TYPE_IFINDEX
:
418 nexthop
->ifindex
= stream_getl (s
);
420 case NEXTHOP_TYPE_IPV4_IFINDEX
:
421 nexthop
->gate
.ipv4
.s_addr
= stream_get_ipv4 (s
);
422 nexthop
->ifindex
= stream_getl (s
);
424 case NEXTHOP_TYPE_IPV6
:
425 stream_get (&nexthop
->gate
.ipv6
, s
, 16);
427 case NEXTHOP_TYPE_IPV6_IFINDEX
:
428 stream_get (&nexthop
->gate
.ipv6
, s
, 16);
429 nexthop
->ifindex
= stream_getl (s
);
436 if (BGP_DEBUG(nht
, NHT
))
438 char buf
[NEXTHOP_STRLEN
];
439 zlog_debug(" nhop via %s",
440 nexthop2str (nexthop
, buf
, sizeof (buf
)));
445 nhlist_tail
->next
= nexthop
;
446 nhlist_tail
= nexthop
;
450 nhlist_tail
= nexthop
;
451 nhlist_head
= nexthop
;
454 /* No need to evaluate the nexthop if we have already determined
455 * that there has been a change.
457 if (bnc
->change_flags
& BGP_NEXTHOP_CHANGED
)
460 for (oldnh
= bnc
->nexthop
; oldnh
; oldnh
= oldnh
->next
)
461 if (nexthop_same_no_recurse(oldnh
, nexthop
))
465 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
467 bnc_nexthop_free(bnc
);
468 bnc
->nexthop
= nhlist_head
;
472 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
473 bnc
->nexthop_num
= nexthop_num
;
475 /* notify bgp fsm if nbr ip goes from valid->invalid */
476 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
478 bnc_nexthop_free(bnc
);
486 * make_prefix - make a prefix structure from the path (essentially
490 make_prefix (int afi
, struct bgp_info
*ri
, struct prefix
*p
)
493 int is_bgp_static
= ((ri
->type
== ZEBRA_ROUTE_BGP
) &&
494 (ri
->sub_type
== BGP_ROUTE_STATIC
)) ? 1 : 0;
496 memset (p
, 0, sizeof (struct prefix
));
503 p
->u
.prefix4
= ri
->net
->p
.u
.prefix4
;
504 p
->prefixlen
= ri
->net
->p
.prefixlen
;
508 p
->u
.prefix4
= ri
->attr
->nexthop
;
509 p
->prefixlen
= IPV4_MAX_BITLEN
;
514 /* We don't register link local NH */
515 if (ri
->attr
->extra
->mp_nexthop_len
!= BGP_ATTR_NHLEN_IPV6_GLOBAL
516 || IN6_IS_ADDR_LINKLOCAL (&ri
->attr
->extra
->mp_nexthop_global
))
519 p
->family
= AF_INET6
;
523 p
->u
.prefix6
= ri
->net
->p
.u
.prefix6
;
524 p
->prefixlen
= ri
->net
->p
.prefixlen
;
528 p
->u
.prefix6
= ri
->attr
->extra
->mp_nexthop_global
;
529 p
->prefixlen
= IPV6_MAX_BITLEN
;
534 if (BGP_DEBUG(nht
, NHT
))
536 zlog_debug("%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
537 __FUNCTION__
, afi
, AFI_IP
, AFI_IP6
);
545 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
548 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
549 * int command -- command to send to zebra
554 sendmsg_zebra_rnh (struct bgp_nexthop_cache
*bnc
, int command
)
561 if (!zclient
|| zclient
->sock
< 0)
564 /* Don't try to register if Zebra doesn't know of this instance. */
565 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc
->bgp
))
571 zclient_create_header (s
, command
, bnc
->bgp
->vrf_id
);
572 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
) ||
573 CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
578 stream_putw(s
, PREFIX_FAMILY(p
));
579 stream_putc(s
, p
->prefixlen
);
580 switch (PREFIX_FAMILY(p
))
583 stream_put_in_addr (s
, &p
->u
.prefix4
);
587 stream_put(s
, &(p
->u
.prefix6
), 16);
593 stream_putw_at (s
, 0, stream_get_endp (s
));
595 ret
= zclient_send_message(zclient
);
596 /* TBD: handle the failure */
598 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
600 if ((command
== ZEBRA_NEXTHOP_REGISTER
) ||
601 (command
== ZEBRA_IMPORT_ROUTE_REGISTER
))
602 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
603 else if ((command
== ZEBRA_NEXTHOP_UNREGISTER
) ||
604 (command
== ZEBRA_IMPORT_ROUTE_UNREGISTER
))
605 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
610 * register_zebra_rnh - register a NH/route with Zebra for notification
611 * when the route or the route to the nexthop changes.
613 * struct bgp_nexthop_cache *bnc
618 register_zebra_rnh (struct bgp_nexthop_cache
*bnc
, int is_bgp_import_route
)
620 /* Check if we have already registered */
621 if (bnc
->flags
& BGP_NEXTHOP_REGISTERED
)
623 if (is_bgp_import_route
)
624 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_REGISTER
);
626 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_REGISTER
);
630 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
632 * struct bgp_nexthop_cache *bnc
637 unregister_zebra_rnh (struct bgp_nexthop_cache
*bnc
, int is_bgp_import_route
)
639 /* Check if we have already registered */
640 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
643 if (is_bgp_import_route
)
644 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_UNREGISTER
);
646 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_UNREGISTER
);
650 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
652 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
657 evaluate_paths (struct bgp_nexthop_cache
*bnc
)
660 struct bgp_info
*path
;
661 struct bgp
*bgp
= bnc
->bgp
;
663 struct peer
*peer
= (struct peer
*)bnc
->nht_info
;
665 if (BGP_DEBUG(nht
, NHT
))
667 char buf
[PREFIX2STR_BUFFER
];
668 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
);
669 zlog_debug("NH update for %s - flags 0x%x chgflags 0x%x - evaluate paths",
670 buf
, bnc
->flags
, bnc
->change_flags
);
673 LIST_FOREACH(path
, &(bnc
->paths
), nh_thread
)
675 if (!(path
->type
== ZEBRA_ROUTE_BGP
&&
676 ((path
->sub_type
== BGP_ROUTE_NORMAL
) ||
677 (path
->sub_type
== BGP_ROUTE_STATIC
))))
681 afi
= family2afi(rn
->p
.family
);
683 /* Path becomes valid/invalid depending on whether the nexthop
684 * reachable/unreachable.
686 if ((CHECK_FLAG(path
->flags
, BGP_INFO_VALID
) ? 1 : 0) !=
687 (bgp_isvalid_nexthop(bnc
) ? 1 : 0))
689 if (CHECK_FLAG (path
->flags
, BGP_INFO_VALID
))
691 bgp_aggregate_decrement (bgp
, &rn
->p
, path
,
693 bgp_info_unset_flag (rn
, path
, BGP_INFO_VALID
);
697 bgp_info_set_flag (rn
, path
, BGP_INFO_VALID
);
698 bgp_aggregate_increment (bgp
, &rn
->p
, path
,
703 /* Copy the metric to the path. Will be used for bestpath computation */
704 if (bgp_isvalid_nexthop(bnc
) && bnc
->metric
)
705 (bgp_info_extra_get(path
))->igpmetric
= bnc
->metric
;
706 else if (path
->extra
)
707 path
->extra
->igpmetric
= 0;
709 if (CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_METRIC_CHANGED
) ||
710 CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
))
711 SET_FLAG(path
->flags
, BGP_INFO_IGP_CHANGED
);
713 bgp_process(bgp
, rn
, afi
, SAFI_UNICAST
);
716 if (peer
&& !CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
))
718 if (BGP_DEBUG(nht
, NHT
))
719 zlog_debug("%s: Updating peer (%s) status with NHT", __FUNCTION__
, peer
->host
);
720 bgp_fsm_nht_update(peer
, bgp_isvalid_nexthop(bnc
));
721 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
724 RESET_FLAG(bnc
->change_flags
);
728 * path_nh_map - make or break path-to-nexthop association.
730 * path - pointer to the path structure
731 * bnc - pointer to the nexthop structure
732 * make - if set, make the association. if unset, just break the existing
736 path_nh_map (struct bgp_info
*path
, struct bgp_nexthop_cache
*bnc
, int make
)
740 LIST_REMOVE(path
, nh_thread
);
741 path
->nexthop
->path_count
--;
742 path
->nexthop
= NULL
;
746 LIST_INSERT_HEAD(&(bnc
->paths
), path
, nh_thread
);
748 path
->nexthop
->path_count
++;