1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "nexthop_group.h"
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
50 extern struct zclient
*zclient
;
52 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
53 int is_bgp_static_route
);
54 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
55 int is_bgp_static_route
);
56 static void evaluate_paths(struct bgp_nexthop_cache
*bnc
);
57 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
);
59 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache
*bnc
)
61 return (bgp_zebra_num_connects() == 0
62 || (bnc
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
)));
65 static int bgp_isvalid_labeled_nexthop(struct bgp_nexthop_cache
*bnc
)
67 return (bgp_zebra_num_connects() == 0
68 || (bnc
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_LABELED_VALID
)));
71 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache
*bnc
)
73 if (LIST_EMPTY(&(bnc
->paths
)) && !bnc
->nht_info
) {
74 if (BGP_DEBUG(nht
, NHT
)) {
75 char buf
[PREFIX2STR_BUFFER
];
76 zlog_debug("%s: freeing bnc %s(%u)(%s)", __func__
,
77 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
),
78 bnc
->srte_color
, bnc
->bgp
->name_pretty
);
80 /* only unregister if this is the last nh for this prefix*/
81 if (!bnc_existing_for_prefix(bnc
))
83 bnc
, CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
));
88 void bgp_unlink_nexthop(struct bgp_path_info
*path
)
90 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
95 path_nh_map(path
, NULL
, false);
97 bgp_unlink_nexthop_check(bnc
);
100 void bgp_unlink_nexthop_by_peer(struct peer
*peer
)
103 struct bgp_nexthop_cache
*bnc
;
104 afi_t afi
= family2afi(peer
->su
.sa
.sa_family
);
106 if (!sockunion2hostprefix(&peer
->su
, &p
))
109 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[afi
], &p
, 0);
113 /* cleanup the peer reference */
114 bnc
->nht_info
= NULL
;
116 bgp_unlink_nexthop_check(bnc
);
120 * A route and its nexthop might belong to different VRFs. Therefore,
121 * we need both the bgp_route and bgp_nexthop pointers.
123 int bgp_find_or_add_nexthop(struct bgp
*bgp_route
, struct bgp
*bgp_nexthop
,
124 afi_t afi
, struct bgp_path_info
*pi
,
125 struct peer
*peer
, int connected
)
127 struct bgp_nexthop_cache_head
*tree
= NULL
;
128 struct bgp_nexthop_cache
*bnc
;
130 uint32_t srte_color
= 0;
131 int is_bgp_static_route
= 0;
134 is_bgp_static_route
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
135 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
139 /* Since Extended Next-hop Encoding (RFC5549) support, we want
141 address-family from the next-hop. */
142 if (!is_bgp_static_route
)
143 afi
= BGP_ATTR_NEXTHOP_AFI_IP6(pi
->attr
) ? AFI_IP6
146 /* Validation for the ipv4 mapped ipv6 nexthop. */
147 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
151 /* This will return true if the global IPv6 NH is a link local
153 if (make_prefix(afi
, pi
, &p
) < 0)
156 srte_color
= pi
->attr
->srte_color
;
158 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
159 if (BGP_DEBUG(nht
, NHT
)) {
161 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
162 __func__
, afi
, AFI_IP
, AFI_IP6
);
169 if (is_bgp_static_route
)
170 tree
= &bgp_nexthop
->import_check_table
[afi
];
172 tree
= &bgp_nexthop
->nexthop_cache_table
[afi
];
174 bnc
= bnc_find(tree
, &p
, srte_color
);
176 bnc
= bnc_new(tree
, &p
, srte_color
);
177 bnc
->bgp
= bgp_nexthop
;
178 if (BGP_DEBUG(nht
, NHT
)) {
179 char buf
[PREFIX2STR_BUFFER
];
181 zlog_debug("Allocated bnc %s(%u)(%s) peer %p",
182 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
),
183 bnc
->srte_color
, bnc
->bgp
->name_pretty
,
188 if (is_bgp_static_route
) {
189 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
);
191 /* If we're toggling the type, re-register */
192 if ((CHECK_FLAG(bgp_route
->flags
, BGP_FLAG_IMPORT_CHECK
))
193 && !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
)) {
194 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
195 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
196 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
197 } else if ((!CHECK_FLAG(bgp_route
->flags
,
198 BGP_FLAG_IMPORT_CHECK
))
199 && CHECK_FLAG(bnc
->flags
,
200 BGP_STATIC_ROUTE_EXACT_MATCH
)) {
201 UNSET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
202 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
203 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
206 /* When nexthop is already known, but now requires 'connected'
208 * re-register it. The reverse scenario where the nexthop currently
210 * 'connected' resolution does not need a re-register (i.e., we treat
211 * 'connected-required' as an override) except in the scenario where
213 * is actually a case of tracking a peer for connectivity (e.g., after
214 * disable connected-check).
215 * NOTE: We don't track the number of paths separately for 'connected-
216 * required' vs 'connected-not-required' as this change is not a common
219 else if (connected
&& !CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
220 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
221 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
222 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
223 } else if (peer
&& !connected
224 && CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
225 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
226 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
227 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
229 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
) {
230 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
231 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
232 } else if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
)
233 && !is_default_host_route(&bnc
->prefix
))
234 register_zebra_rnh(bnc
, is_bgp_static_route
);
236 if (pi
&& pi
->nexthop
!= bnc
) {
237 /* Unlink from existing nexthop cache, if any. This will also
239 * the nexthop cache entry, if appropriate.
241 bgp_unlink_nexthop(pi
);
243 /* updates NHT pi list reference */
244 path_nh_map(pi
, bnc
, true);
246 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
247 (bgp_path_info_extra_get(pi
))->igpmetric
= bnc
->metric
;
249 pi
->extra
->igpmetric
= 0;
251 bnc
->nht_info
= (void *)peer
; /* NHT peer reference */
254 * We are cheating here. Views have no associated underlying
255 * ability to detect nexthops. So when we have a view
256 * just tell everyone the nexthop is valid
258 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
261 return (bgp_isvalid_nexthop(bnc
));
264 void bgp_delete_connected_nexthop(afi_t afi
, struct peer
*peer
)
266 struct bgp_nexthop_cache
*bnc
;
272 if (!sockunion2hostprefix(&peer
->su
, &p
))
275 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[family2afi(p
.family
)],
278 if (BGP_DEBUG(nht
, NHT
))
280 "Cannot find connected NHT node for peer %s(%s)",
281 peer
->host
, peer
->bgp
->name_pretty
);
285 if (bnc
->nht_info
!= peer
) {
286 if (BGP_DEBUG(nht
, NHT
))
288 "Connected NHT %p node for peer %s(%s) points to %p",
289 bnc
, peer
->host
, bnc
->bgp
->name_pretty
,
294 bnc
->nht_info
= NULL
;
296 if (LIST_EMPTY(&(bnc
->paths
))) {
297 if (BGP_DEBUG(nht
, NHT
))
299 "Freeing connected NHT node %p for peer %s(%s)",
300 bnc
, peer
->host
, bnc
->bgp
->name_pretty
);
301 unregister_zebra_rnh(bnc
, 0);
306 static void bgp_process_nexthop_update(struct bgp_nexthop_cache
*bnc
,
307 struct zapi_route
*nhr
)
309 struct nexthop
*nexthop
;
310 struct nexthop
*oldnh
;
311 struct nexthop
*nhlist_head
= NULL
;
312 struct nexthop
*nhlist_tail
= NULL
;
315 bnc
->last_update
= bgp_clock();
316 bnc
->change_flags
= 0;
318 /* debug print the input */
319 if (BGP_DEBUG(nht
, NHT
))
321 "%s(%u): Rcvd NH update %pFX(%u) - metric %d/%d #nhops %d/%d flags 0x%x",
322 bnc
->bgp
->name_pretty
, bnc
->bgp
->vrf_id
, &nhr
->prefix
,
323 bnc
->srte_color
, nhr
->metric
, bnc
->metric
,
324 nhr
->nexthop_num
, bnc
->nexthop_num
, bnc
->flags
);
326 if (nhr
->metric
!= bnc
->metric
)
327 bnc
->change_flags
|= BGP_NEXTHOP_METRIC_CHANGED
;
329 if (nhr
->nexthop_num
!= bnc
->nexthop_num
)
330 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
332 if (nhr
->nexthop_num
) {
333 struct peer
*peer
= bnc
->nht_info
;
335 /* notify bgp fsm if nbr ip goes from invalid->valid */
336 if (!bnc
->nexthop_num
)
337 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
339 bnc
->flags
|= BGP_NEXTHOP_VALID
;
340 bnc
->metric
= nhr
->metric
;
341 bnc
->nexthop_num
= nhr
->nexthop_num
;
343 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
; /* check below */
345 for (i
= 0; i
< nhr
->nexthop_num
; i
++) {
348 nexthop
= nexthop_from_zapi_nexthop(&nhr
->nexthops
[i
]);
351 * Turn on RA for the v6 nexthops
352 * we receive from bgp. This is to allow us
353 * to work with v4 routing over v6 nexthops
355 if (peer
&& !peer
->ifp
356 && CHECK_FLAG(peer
->flags
,
357 PEER_FLAG_CAPABILITY_ENHE
)
358 && nhr
->prefix
.family
== AF_INET6
359 && nexthop
->type
!= NEXTHOP_TYPE_BLACKHOLE
) {
360 struct interface
*ifp
;
362 ifp
= if_lookup_by_index(nexthop
->ifindex
,
365 zclient_send_interface_radv_req(
366 zclient
, nexthop
->vrf_id
, ifp
,
368 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
370 /* There is at least one label-switched path */
371 if (nexthop
->nh_label
&&
372 nexthop
->nh_label
->num_labels
) {
374 bnc
->flags
|= BGP_NEXTHOP_LABELED_VALID
;
375 num_labels
= nexthop
->nh_label
->num_labels
;
378 if (BGP_DEBUG(nht
, NHT
)) {
379 char buf
[NEXTHOP_STRLEN
];
381 " nhop via %s (%d labels)",
382 nexthop2str(nexthop
, buf
, sizeof(buf
)),
387 nhlist_tail
->next
= nexthop
;
388 nhlist_tail
= nexthop
;
390 nhlist_tail
= nexthop
;
391 nhlist_head
= nexthop
;
394 /* No need to evaluate the nexthop if we have already
396 * that there has been a change.
398 if (bnc
->change_flags
& BGP_NEXTHOP_CHANGED
)
401 for (oldnh
= bnc
->nexthop
; oldnh
; oldnh
= oldnh
->next
)
402 if (nexthop_same(oldnh
, nexthop
))
406 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
408 bnc_nexthop_free(bnc
);
409 bnc
->nexthop
= nhlist_head
;
411 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
412 bnc
->nexthop_num
= nhr
->nexthop_num
;
414 /* notify bgp fsm if nbr ip goes from valid->invalid */
415 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
417 bnc_nexthop_free(bnc
);
424 void bgp_parse_nexthop_update(int command
, vrf_id_t vrf_id
)
426 struct bgp_nexthop_cache_head
*tree
= NULL
;
427 struct bgp_nexthop_cache
*bnc
;
429 struct zapi_route nhr
;
432 bgp
= bgp_lookup_by_vrf_id(vrf_id
);
436 "parse nexthop update: instance not found for vrf_id %u",
441 if (!zapi_nexthop_update_decode(zclient
->ibuf
, &nhr
)) {
442 zlog_err("%s[%s]: Failure to decode nexthop update",
443 __PRETTY_FUNCTION__
, bgp
->name_pretty
);
447 afi
= family2afi(nhr
.prefix
.family
);
448 if (command
== ZEBRA_NEXTHOP_UPDATE
)
449 tree
= &bgp
->nexthop_cache_table
[afi
];
450 else if (command
== ZEBRA_IMPORT_CHECK_UPDATE
)
451 tree
= &bgp
->import_check_table
[afi
];
453 bnc
= bnc_find(tree
, &nhr
.prefix
, nhr
.srte_color
);
455 if (BGP_DEBUG(nht
, NHT
))
457 "parse nexthop update(%pFX(%u)(%s)): bnc info not found",
458 &nhr
.prefix
, nhr
.srte_color
, bgp
->name_pretty
);
462 bgp_process_nexthop_update(bnc
, &nhr
);
465 * HACK: if any BGP route is dependant on an SR-policy that doesn't
466 * exist, zebra will never send NH updates relative to that policy. In
467 * that case, whenever we receive an update about a colorless NH, update
468 * the corresponding colorful NHs that share the same endpoint but that
469 * are inactive. This ugly hack should work around the problem at the
470 * cost of a performance pernalty. Long term, what should be done is to
471 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
472 * which should provide a better infrastructure to solve this issue in
473 * a more efficient and elegant way.
475 if (nhr
.srte_color
== 0) {
476 struct bgp_nexthop_cache
*bnc_iter
;
478 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
480 if (!prefix_same(&bnc
->prefix
, &bnc_iter
->prefix
)
481 || bnc_iter
->srte_color
== 0
482 || CHECK_FLAG(bnc_iter
->flags
, BGP_NEXTHOP_VALID
))
485 bgp_process_nexthop_update(bnc_iter
, &nhr
);
491 * Cleanup nexthop registration and status information for BGP nexthops
492 * pertaining to this VRF. This is invoked upon VRF deletion.
494 void bgp_cleanup_nexthops(struct bgp
*bgp
)
496 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
497 struct bgp_nexthop_cache
*bnc
;
499 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
501 /* Clear relevant flags. */
502 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
503 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
504 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
510 * make_prefix - make a prefix structure from the path (essentially
513 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
)
516 int is_bgp_static
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
517 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
520 struct bgp_dest
*net
= pi
->net
;
521 const struct prefix
*p_orig
= bgp_dest_get_prefix(net
);
524 if (p_orig
->family
== AF_FLOWSPEC
) {
527 return bgp_flowspec_get_first_nh(pi
->peer
->bgp
,
530 memset(p
, 0, sizeof(struct prefix
));
535 p
->u
.prefix4
= p_orig
->u
.prefix4
;
536 p
->prefixlen
= p_orig
->prefixlen
;
538 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
539 ipv4_mapped_ipv6_to_ipv4(
540 &pi
->attr
->mp_nexthop_global
, &ipv4
);
542 p
->prefixlen
= IPV4_MAX_BITLEN
;
544 p
->u
.prefix4
= pi
->attr
->nexthop
;
545 p
->prefixlen
= IPV4_MAX_BITLEN
;
550 p
->family
= AF_INET6
;
553 p
->u
.prefix6
= p_orig
->u
.prefix6
;
554 p
->prefixlen
= p_orig
->prefixlen
;
556 /* If we receive MP_REACH nexthop with ::(LL)
557 * or LL(LL), use LL address as nexthop cache.
559 if (pi
->attr
->mp_nexthop_len
560 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
561 && (IN6_IS_ADDR_UNSPECIFIED(
562 &pi
->attr
->mp_nexthop_global
)
563 || IN6_IS_ADDR_LINKLOCAL(
564 &pi
->attr
->mp_nexthop_global
)))
565 p
->u
.prefix6
= pi
->attr
->mp_nexthop_local
;
567 p
->u
.prefix6
= pi
->attr
->mp_nexthop_global
;
568 p
->prefixlen
= IPV6_MAX_BITLEN
;
572 if (BGP_DEBUG(nht
, NHT
)) {
574 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
575 __func__
, afi
, AFI_IP
, AFI_IP6
);
583 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
586 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
587 * int command -- command to send to zebra
591 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache
*bnc
, int command
)
593 bool exact_match
= false;
599 /* Don't try to register if Zebra doesn't know of this instance. */
600 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc
->bgp
)) {
601 if (BGP_DEBUG(zebra
, ZEBRA
))
603 "%s: No zebra instance to talk to, not installing NHT entry",
608 if (!bgp_zebra_num_connects()) {
609 if (BGP_DEBUG(zebra
, ZEBRA
))
611 "%s: We have not connected yet, cannot send nexthops",
614 if ((command
== ZEBRA_NEXTHOP_REGISTER
615 || command
== ZEBRA_IMPORT_ROUTE_REGISTER
)
616 && (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)
617 || CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
)))
620 if (BGP_DEBUG(zebra
, ZEBRA
))
621 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__
,
622 zserv_command_string(command
), &bnc
->prefix
,
623 bnc
->bgp
->name_pretty
);
625 ret
= zclient_send_rnh(zclient
, command
, &bnc
->prefix
, exact_match
,
627 /* TBD: handle the failure */
628 if (ret
== ZCLIENT_SEND_FAILURE
)
629 flog_warn(EC_BGP_ZEBRA_SEND
,
630 "sendmsg_nexthop: zclient_send_message() failed");
632 if ((command
== ZEBRA_NEXTHOP_REGISTER
)
633 || (command
== ZEBRA_IMPORT_ROUTE_REGISTER
))
634 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
635 else if ((command
== ZEBRA_NEXTHOP_UNREGISTER
)
636 || (command
== ZEBRA_IMPORT_ROUTE_UNREGISTER
))
637 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
642 * register_zebra_rnh - register a NH/route with Zebra for notification
643 * when the route or the route to the nexthop changes.
645 * struct bgp_nexthop_cache *bnc
649 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
650 int is_bgp_import_route
)
652 /* Check if we have already registered */
653 if (bnc
->flags
& BGP_NEXTHOP_REGISTERED
)
655 if (is_bgp_import_route
)
656 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_REGISTER
);
658 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_REGISTER
);
662 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
664 * struct bgp_nexthop_cache *bnc
668 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
669 int is_bgp_import_route
)
671 /* Check if we have already registered */
672 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
675 if (is_bgp_import_route
)
676 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_UNREGISTER
);
678 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_UNREGISTER
);
682 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
684 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
688 static void evaluate_paths(struct bgp_nexthop_cache
*bnc
)
690 struct bgp_dest
*dest
;
691 struct bgp_path_info
*path
;
693 struct peer
*peer
= (struct peer
*)bnc
->nht_info
;
694 struct bgp_table
*table
;
696 struct bgp
*bgp_path
;
697 const struct prefix
*p
;
699 if (BGP_DEBUG(nht
, NHT
)) {
700 char buf
[PREFIX2STR_BUFFER
];
701 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
);
703 "NH update for %s(%u)(%s) - flags 0x%x chgflags 0x%x - evaluate paths",
704 buf
, bnc
->srte_color
, bnc
->bgp
->name_pretty
, bnc
->flags
,
708 LIST_FOREACH (path
, &(bnc
->paths
), nh_thread
) {
709 if (!(path
->type
== ZEBRA_ROUTE_BGP
710 && ((path
->sub_type
== BGP_ROUTE_NORMAL
)
711 || (path
->sub_type
== BGP_ROUTE_STATIC
)
712 || (path
->sub_type
== BGP_ROUTE_IMPORTED
))))
716 assert(dest
&& bgp_dest_table(dest
));
717 p
= bgp_dest_get_prefix(dest
);
718 afi
= family2afi(p
->family
);
719 table
= bgp_dest_table(dest
);
723 * handle routes from other VRFs (they can have a
724 * nexthop in THIS VRF). bgp_path is the bgp instance
725 * that owns the route referencing this nexthop.
727 bgp_path
= table
->bgp
;
730 * Path becomes valid/invalid depending on whether the nexthop
731 * reachable/unreachable.
733 * In case of unicast routes that were imported from vpn
734 * and that have labels, they are valid only if there are
735 * nexthops with labels
738 bool bnc_is_valid_nexthop
= false;
739 bool path_valid
= false;
741 if (safi
== SAFI_UNICAST
&&
742 path
->sub_type
== BGP_ROUTE_IMPORTED
&&
744 path
->extra
->num_labels
) {
746 bnc_is_valid_nexthop
=
747 bgp_isvalid_labeled_nexthop(bnc
) ? true : false;
749 if (bgp_update_martian_nexthop(
750 bnc
->bgp
, afi
, safi
, path
->type
,
751 path
->sub_type
, path
->attr
, dest
)) {
752 if (BGP_DEBUG(nht
, NHT
))
754 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
755 __func__
, dest
, bgp_path
->name
);
757 bnc_is_valid_nexthop
=
758 bgp_isvalid_nexthop(bnc
) ? true : false;
761 if (BGP_DEBUG(nht
, NHT
)) {
762 char buf1
[RD_ADDRSTRLEN
];
765 prefix_rd2str((struct prefix_rd
*)bgp_dest_get_prefix(dest
->pdest
),
768 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
769 afi
, safi
, dest
, buf1
,
770 bgp_path
->name_pretty
, path
->flags
);
773 "... eval path %d/%d %pBD %s flags 0x%x",
774 afi
, safi
, dest
, bgp_path
->name_pretty
,
778 /* Skip paths marked for removal or as history. */
779 if (CHECK_FLAG(path
->flags
, BGP_PATH_REMOVED
)
780 || CHECK_FLAG(path
->flags
, BGP_PATH_HISTORY
))
783 /* Copy the metric to the path. Will be used for bestpath
785 if (bgp_isvalid_nexthop(bnc
) && bnc
->metric
)
786 (bgp_path_info_extra_get(path
))->igpmetric
=
788 else if (path
->extra
)
789 path
->extra
->igpmetric
= 0;
791 if (CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_METRIC_CHANGED
)
792 || CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
)
793 || path
->attr
->srte_color
!= 0)
794 SET_FLAG(path
->flags
, BGP_PATH_IGP_CHANGED
);
796 path_valid
= !!CHECK_FLAG(path
->flags
, BGP_PATH_VALID
);
797 if (path_valid
!= bnc_is_valid_nexthop
) {
799 /* No longer valid, clear flag; also for EVPN
800 * routes, unimport from VRFs if needed.
802 bgp_aggregate_decrement(bgp_path
, p
, path
, afi
,
804 bgp_path_info_unset_flag(dest
, path
,
806 if (safi
== SAFI_EVPN
&&
807 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
808 bgp_evpn_unimport_route(bgp_path
,
809 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
811 /* Path becomes valid, set flag; also for EVPN
812 * routes, import from VRFs if needed.
814 bgp_path_info_set_flag(dest
, path
,
816 bgp_aggregate_increment(bgp_path
, p
, path
, afi
,
818 if (safi
== SAFI_EVPN
&&
819 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
820 bgp_evpn_import_route(bgp_path
,
821 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
825 bgp_process(bgp_path
, dest
, afi
, safi
);
829 int valid_nexthops
= bgp_isvalid_nexthop(bnc
);
832 peer
->last_reset
= PEER_DOWN_WAITING_OPEN
;
834 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
836 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
)) {
837 if (BGP_DEBUG(nht
, NHT
))
839 "%s: Updating peer (%s(%s)) status with NHT",
840 __func__
, peer
->host
,
841 peer
->bgp
->name_pretty
);
842 bgp_fsm_nht_update(peer
, !!valid_nexthops
);
843 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
847 RESET_FLAG(bnc
->change_flags
);
851 * path_nh_map - make or break path-to-nexthop association.
853 * path - pointer to the path structure
854 * bnc - pointer to the nexthop structure
855 * make - if set, make the association. if unset, just break the existing
858 void path_nh_map(struct bgp_path_info
*path
, struct bgp_nexthop_cache
*bnc
,
862 LIST_REMOVE(path
, nh_thread
);
863 path
->nexthop
->path_count
--;
864 path
->nexthop
= NULL
;
867 LIST_INSERT_HEAD(&(bnc
->paths
), path
, nh_thread
);
869 path
->nexthop
->path_count
++;
874 * This function is called to register nexthops to zebra
875 * as that we may have tried to install the nexthops
876 * before we actually have a zebra connection
878 void bgp_nht_register_nexthops(struct bgp
*bgp
)
880 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
881 struct bgp_nexthop_cache
*bnc
;
883 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
885 register_zebra_rnh(bnc
, 0);
890 void bgp_nht_reg_enhe_cap_intfs(struct peer
*peer
)
893 struct bgp_nexthop_cache
*bnc
;
894 struct nexthop
*nhop
;
895 struct interface
*ifp
;
902 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
903 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
904 __func__
, peer
->host
);
908 if (p
.family
!= AF_INET6
)
911 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0);
915 if (peer
!= bnc
->nht_info
)
918 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
919 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
924 zclient_send_interface_radv_req(zclient
,
927 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
931 void bgp_nht_dereg_enhe_cap_intfs(struct peer
*peer
)
934 struct bgp_nexthop_cache
*bnc
;
935 struct nexthop
*nhop
;
936 struct interface
*ifp
;
944 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
945 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
946 __func__
, peer
->host
);
950 if (p
.family
!= AF_INET6
)
953 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0);
957 if (peer
!= bnc
->nht_info
)
960 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
961 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
966 zclient_send_interface_radv_req(zclient
, nhop
->vrf_id
, ifp
, 0,
971 /****************************************************************************
972 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
973 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
974 * left to the application using it.
975 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
976 * failover of remote ES links.
977 ***************************************************************************/
978 static bitfield_t bgp_nh_id_bitmap
;
979 static uint32_t bgp_l3nhg_start
;
981 /* XXX - currently we do nothing on the callbacks */
982 static void bgp_l3nhg_add_cb(const char *name
)
985 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
986 const struct nexthop
*nhop
)
989 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
990 const struct nexthop
*nhop
)
993 static void bgp_l3nhg_del_cb(const char *name
)
997 static void bgp_l3nhg_zebra_init(void)
999 static bool bgp_l3nhg_zebra_inited
;
1000 if (bgp_l3nhg_zebra_inited
)
1003 bgp_l3nhg_zebra_inited
= true;
1004 bgp_l3nhg_start
= zclient_get_nhg_start(ZEBRA_ROUTE_BGP
);
1005 nexthop_group_init(bgp_l3nhg_add_cb
, bgp_l3nhg_add_nexthop_cb
,
1006 bgp_l3nhg_del_nexthop_cb
, bgp_l3nhg_del_cb
);
1010 #define min(A, B) ((A) < (B) ? (A) : (B))
1011 void bgp_l3nhg_init(void)
1015 id_max
= min(ZEBRA_NHG_PROTO_SPACING
- 1, 16 * 1024);
1016 bf_init(bgp_nh_id_bitmap
, id_max
);
1017 bf_assign_zero_index(bgp_nh_id_bitmap
);
1019 if (BGP_DEBUG(nht
, NHT
) || BGP_DEBUG(evpn_mh
, EVPN_MH_ES
))
1020 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start
+ 1,
1021 bgp_l3nhg_start
+ id_max
);
1024 void bgp_l3nhg_finish(void)
1026 bf_free(bgp_nh_id_bitmap
);
1029 uint32_t bgp_l3nhg_id_alloc(void)
1031 uint32_t nhg_id
= 0;
1033 bgp_l3nhg_zebra_init();
1034 bf_assign_index(bgp_nh_id_bitmap
, nhg_id
);
1036 nhg_id
+= bgp_l3nhg_start
;
1041 void bgp_l3nhg_id_free(uint32_t nhg_id
)
1043 if (!nhg_id
|| (nhg_id
<= bgp_l3nhg_start
))
1046 nhg_id
-= bgp_l3nhg_start
;
1048 bf_release_index(bgp_nh_id_bitmap
, nhg_id
);