1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* BGP Nexthop tracking
3 * Copyright (C) 2013 Cumulus Networks, Inc.
19 #include "nexthop_group.h"
21 #include "bgpd/bgpd.h"
22 #include "bgpd/bgp_table.h"
23 #include "bgpd/bgp_route.h"
24 #include "bgpd/bgp_attr.h"
25 #include "bgpd/bgp_nexthop.h"
26 #include "bgpd/bgp_debug.h"
27 #include "bgpd/bgp_errors.h"
28 #include "bgpd/bgp_nht.h"
29 #include "bgpd/bgp_fsm.h"
30 #include "bgpd/bgp_zebra.h"
31 #include "bgpd/bgp_flowspec_util.h"
32 #include "bgpd/bgp_evpn.h"
33 #include "bgpd/bgp_rd.h"
34 #include "bgpd/bgp_mplsvpn.h"
36 extern struct zclient
*zclient
;
38 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
);
39 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
);
40 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
);
41 static void bgp_nht_ifp_initial(struct event
*thread
);
43 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache
*bnc
)
45 return (bgp_zebra_num_connects() == 0
46 || (bnc
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
)
47 && bnc
->nexthop_num
> 0));
50 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache
*bnc
,
51 struct bgp_path_info
*path
)
53 struct interface
*ifp
= NULL
;
54 struct nexthop
*nexthop
;
55 struct bgp_interface
*iifp
;
58 if (!path
->extra
|| !path
->extra
->peer_orig
)
61 peer
= path
->extra
->peer_orig
;
63 /* only connected ebgp peers are valid */
64 if (peer
->sort
!= BGP_PEER_EBGP
|| peer
->ttl
!= BGP_DEFAULT_TTL
||
65 CHECK_FLAG(peer
->flags
, PEER_FLAG_DISABLE_CONNECTED_CHECK
) ||
66 CHECK_FLAG(peer
->bgp
->flags
, BGP_FLAG_DISABLE_NH_CONNECTED_CHK
))
69 for (nexthop
= bnc
->nexthop
; nexthop
; nexthop
= nexthop
->next
) {
70 if (nexthop
->type
== NEXTHOP_TYPE_IFINDEX
||
71 nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
||
72 nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
73 ifp
= if_lookup_by_index(
74 bnc
->ifindex
? bnc
->ifindex
: nexthop
->ifindex
,
80 if (CHECK_FLAG(iifp
->flags
, BGP_INTERFACE_MPLS_BGP_FORWARDING
))
86 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache
*bnc
,
87 struct bgp_path_info
*path
)
89 struct interface
*ifp
= NULL
;
90 struct nexthop
*nexthop
;
92 for (nexthop
= bnc
->nexthop
; nexthop
; nexthop
= nexthop
->next
) {
93 if (nexthop
->type
!= NEXTHOP_TYPE_BLACKHOLE
) {
94 ifp
= if_lookup_by_index(
95 bnc
->ifindex
? bnc
->ifindex
: nexthop
->ifindex
,
97 if (ifp
&& (ifp
->ll_type
== ZEBRA_LLT_IPGRE
||
98 ifp
->ll_type
== ZEBRA_LLT_IP6GRE
))
105 if (CHECK_FLAG(path
->attr
->rmap_change_flags
,
106 BATTR_RMAP_L3VPN_ACCEPT_GRE
))
112 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache
*bnc
,
113 struct bgp_path_info
*path
)
116 * - In the case of MPLS-VPN, the label is learned from LDP or other
117 * protocols, and nexthop tracking is enabled for the label.
118 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
119 * - In the case of SRv6-VPN, we need to track the reachability to the
120 * SID (in other words, IPv6 address). As in MPLS, we need to record
121 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
122 * currently not implemented, and this function assumes that all
123 * Transit routes for SRv6-VPN are valid.
124 * - Otherwise check for mpls-gre acceptance
126 return (bgp_zebra_num_connects() == 0 ||
127 (bnc
&& (bnc
->nexthop_num
> 0 &&
128 (CHECK_FLAG(path
->flags
, BGP_PATH_ACCEPT_OWN
) ||
129 CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_LABELED_VALID
) ||
130 bnc
->bgp
->srv6_enabled
||
131 bgp_isvalid_nexthop_for_ebgp(bnc
, path
) ||
132 bgp_isvalid_nexthop_for_mplsovergre(bnc
, path
)))));
135 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache
*bnc
)
137 if (LIST_EMPTY(&(bnc
->paths
)) && !bnc
->nht_info
) {
138 if (BGP_DEBUG(nht
, NHT
))
139 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__
,
140 &bnc
->prefix
, bnc
->ifindex
, bnc
->srte_color
,
141 bnc
->bgp
->name_pretty
);
142 /* only unregister if this is the last nh for this prefix*/
143 if (!bnc_existing_for_prefix(bnc
))
144 unregister_zebra_rnh(bnc
);
149 void bgp_unlink_nexthop(struct bgp_path_info
*path
)
151 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
153 bgp_mplsvpn_path_nh_label_unlink(path
);
158 path_nh_map(path
, NULL
, false);
160 bgp_unlink_nexthop_check(bnc
);
163 void bgp_replace_nexthop_by_peer(struct peer
*from
, struct peer
*to
)
167 struct bgp_nexthop_cache
*bncp
, *bnct
;
169 ifindex_t ifindex
= 0;
171 if (!sockunion2hostprefix(&from
->su
, &pp
))
175 * Gather the ifindex for if up/down events to be
176 * tagged into this fun
178 if (from
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&from
->su
.sin6
.sin6_addr
))
179 ifindex
= from
->su
.sin6
.sin6_scope_id
;
181 afi
= family2afi(pp
.family
);
182 bncp
= bnc_find(&from
->bgp
->nexthop_cache_table
[afi
], &pp
, 0, ifindex
);
184 if (!sockunion2hostprefix(&to
->su
, &pt
))
188 * Gather the ifindex for if up/down events to be
189 * tagged into this fun
192 if (to
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&to
->su
.sin6
.sin6_addr
))
193 ifindex
= to
->su
.sin6
.sin6_scope_id
;
194 bnct
= bnc_find(&to
->bgp
->nexthop_cache_table
[afi
], &pt
, 0, ifindex
);
204 * Returns the bnc whose bnc->nht_info matches the LL peer by
205 * looping through the IPv6 nexthop table
207 static struct bgp_nexthop_cache
*
208 bgp_find_ipv6_nexthop_matching_peer(struct peer
*peer
)
210 struct bgp_nexthop_cache
*bnc
;
212 frr_each (bgp_nexthop_cache
, &peer
->bgp
->nexthop_cache_table
[AFI_IP6
],
214 if (bnc
->nht_info
== peer
) {
215 if (BGP_DEBUG(nht
, NHT
)) {
217 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
218 &bnc
->prefix
, bnc
->ifindex
,
219 bnc
->srte_color
, bnc
, peer
->host
,
220 peer
->bgp
->name_pretty
, peer
);
226 if (BGP_DEBUG(nht
, NHT
))
228 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
229 peer
->host
, peer
->bgp
->name_pretty
, peer
);
234 void bgp_unlink_nexthop_by_peer(struct peer
*peer
)
237 struct bgp_nexthop_cache
*bnc
;
238 afi_t afi
= family2afi(peer
->su
.sa
.sa_family
);
239 ifindex_t ifindex
= 0;
241 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
243 * In scenarios where unnumbered BGP session is brought
244 * down by shutting down the interface before unconfiguring
245 * the BGP neighbor, neighbor information in peer->su.sa
246 * will be cleared when the interface is shutdown. So
247 * during the deletion of unnumbered bgp peer, above check
248 * will return true. Therefore, in this case,BGP needs to
249 * find the bnc whose bnc->nht_info matches the
250 * peer being deleted and free it.
252 bnc
= bgp_find_ipv6_nexthop_matching_peer(peer
);
255 * Gather the ifindex for if up/down events to be
256 * tagged into this fun
258 if (afi
== AFI_IP6
&&
259 IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
260 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
261 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[afi
], &p
, 0,
268 /* cleanup the peer reference */
269 bnc
->nht_info
= NULL
;
271 bgp_unlink_nexthop_check(bnc
);
275 * A route and its nexthop might belong to different VRFs. Therefore,
276 * we need both the bgp_route and bgp_nexthop pointers.
278 int bgp_find_or_add_nexthop(struct bgp
*bgp_route
, struct bgp
*bgp_nexthop
,
279 afi_t afi
, safi_t safi
, struct bgp_path_info
*pi
,
280 struct peer
*peer
, int connected
,
281 const struct prefix
*orig_prefix
)
283 struct bgp_nexthop_cache_head
*tree
= NULL
;
284 struct bgp_nexthop_cache
*bnc
;
285 struct bgp_path_info
*bpi_ultimate
;
287 uint32_t srte_color
= 0;
288 int is_bgp_static_route
= 0;
289 ifindex_t ifindex
= 0;
292 is_bgp_static_route
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
293 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
297 /* Since Extended Next-hop Encoding (RFC5549) support, we want
299 address-family from the next-hop. */
300 if (!is_bgp_static_route
)
301 afi
= BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi
->attr
) ? AFI_IP6
304 /* Validation for the ipv4 mapped ipv6 nexthop. */
305 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
309 /* This will return true if the global IPv6 NH is a link local
311 if (make_prefix(afi
, pi
, &p
) < 0)
314 if (!is_bgp_static_route
&& orig_prefix
315 && prefix_same(&p
, orig_prefix
)) {
316 if (BGP_DEBUG(nht
, NHT
)) {
318 "%s(%pFX): prefix loops through itself",
324 srte_color
= pi
->attr
->srte_color
;
327 * Gather the ifindex for if up/down events to be
328 * tagged into this fun
330 if (afi
== AFI_IP6
&& peer
->conf_if
&&
331 IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
)) {
332 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
334 if (BGP_DEBUG(nht
, NHT
)) {
336 "%s: Unable to locate ifindex, waiting till we have one",
343 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
344 if (BGP_DEBUG(nht
, NHT
)) {
346 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
347 __func__
, afi
, AFI_IP
, AFI_IP6
);
354 if (is_bgp_static_route
)
355 tree
= &bgp_nexthop
->import_check_table
[afi
];
357 tree
= &bgp_nexthop
->nexthop_cache_table
[afi
];
359 bnc
= bnc_find(tree
, &p
, srte_color
, ifindex
);
361 bnc
= bnc_new(tree
, &p
, srte_color
, ifindex
);
362 bnc
->bgp
= bgp_nexthop
;
363 if (BGP_DEBUG(nht
, NHT
))
364 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
365 &bnc
->prefix
, bnc
->ifindex
, bnc
->srte_color
,
366 bnc
->bgp
->name_pretty
, peer
);
368 if (BGP_DEBUG(nht
, NHT
))
370 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
371 &bnc
->prefix
, bnc
->ifindex
,
372 bnc
->bgp
->name_pretty
, bnc
->flags
, bnc
->ifindex
,
373 bnc
->path_count
, bnc
->nht_info
);
376 if (pi
&& is_route_parent_evpn(pi
))
377 bnc
->is_evpn_gwip_nexthop
= true;
379 if (is_bgp_static_route
) {
380 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
);
382 /* If we're toggling the type, re-register */
383 if ((CHECK_FLAG(bgp_route
->flags
, BGP_FLAG_IMPORT_CHECK
))
384 && !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
)) {
385 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
386 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
387 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
388 } else if ((!CHECK_FLAG(bgp_route
->flags
,
389 BGP_FLAG_IMPORT_CHECK
))
390 && CHECK_FLAG(bnc
->flags
,
391 BGP_STATIC_ROUTE_EXACT_MATCH
)) {
392 UNSET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
393 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
394 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
397 /* When nexthop is already known, but now requires 'connected'
399 * re-register it. The reverse scenario where the nexthop currently
401 * 'connected' resolution does not need a re-register (i.e., we treat
402 * 'connected-required' as an override) except in the scenario where
404 * is actually a case of tracking a peer for connectivity (e.g., after
405 * disable connected-check).
406 * NOTE: We don't track the number of paths separately for 'connected-
407 * required' vs 'connected-not-required' as this change is not a common
410 else if (connected
&& !CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
411 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
412 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
413 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
414 } else if (peer
&& !connected
415 && CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
416 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
417 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
418 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
420 if (peer
&& (bnc
->ifindex
!= ifindex
)) {
421 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
422 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
423 bnc
->ifindex
= ifindex
;
425 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
) {
426 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
427 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
428 } else if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
)
429 && !is_default_host_route(&bnc
->prefix
))
430 register_zebra_rnh(bnc
);
432 if (pi
&& pi
->nexthop
!= bnc
) {
433 /* Unlink from existing nexthop cache, if any. This will also
435 * the nexthop cache entry, if appropriate.
437 bgp_unlink_nexthop(pi
);
439 /* updates NHT pi list reference */
440 path_nh_map(pi
, bnc
, true);
442 bpi_ultimate
= bgp_get_imported_bpi_ultimate(pi
);
443 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
444 (bgp_path_info_extra_get(bpi_ultimate
))->igpmetric
=
446 else if (bpi_ultimate
->extra
)
447 bpi_ultimate
->extra
->igpmetric
= 0;
450 * Let's not accidentally save the peer data for a peer
451 * we are going to throw away in a second or so.
452 * When we come back around we'll fix up this
453 * data properly in replace_nexthop_by_peer
455 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_CONFIG_NODE
))
456 bnc
->nht_info
= (void *)peer
; /* NHT peer reference */
460 * We are cheating here. Views have no associated underlying
461 * ability to detect nexthops. So when we have a view
462 * just tell everyone the nexthop is valid
464 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
466 else if (safi
== SAFI_UNICAST
&& pi
&&
467 pi
->sub_type
== BGP_ROUTE_IMPORTED
&& pi
->extra
&&
468 pi
->extra
->num_labels
&& !bnc
->is_evpn_gwip_nexthop
)
469 return bgp_isvalid_nexthop_for_mpls(bnc
, pi
);
471 return (bgp_isvalid_nexthop(bnc
));
474 void bgp_delete_connected_nexthop(afi_t afi
, struct peer
*peer
)
476 struct bgp_nexthop_cache
*bnc
;
478 ifindex_t ifindex
= 0;
484 * In case the below check evaluates true and if
485 * the bnc has not been freed at this point, then
486 * we might have to do something similar to what's
487 * done in bgp_unlink_nexthop_by_peer(). Since
488 * bgp_unlink_nexthop_by_peer() loops through the
489 * nodes of V6 nexthop cache to find the bnc, it is
490 * currently not being called here.
492 if (!sockunion2hostprefix(&peer
->su
, &p
))
495 * Gather the ifindex for if up/down events to be
496 * tagged into this fun
498 if (afi
== AFI_IP6
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
499 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
500 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[family2afi(p
.family
)],
503 if (BGP_DEBUG(nht
, NHT
))
505 "Cannot find connected NHT node for peer %s(%s)",
506 peer
->host
, peer
->bgp
->name_pretty
);
510 if (bnc
->nht_info
!= peer
) {
511 if (BGP_DEBUG(nht
, NHT
))
513 "Connected NHT %p node for peer %s(%s) points to %p",
514 bnc
, peer
->host
, bnc
->bgp
->name_pretty
,
519 bnc
->nht_info
= NULL
;
521 if (LIST_EMPTY(&(bnc
->paths
))) {
522 if (BGP_DEBUG(nht
, NHT
))
524 "Freeing connected NHT node %p for peer %s(%s)",
525 bnc
, peer
->host
, bnc
->bgp
->name_pretty
);
526 unregister_zebra_rnh(bnc
);
531 static void bgp_process_nexthop_update(struct bgp_nexthop_cache
*bnc
,
532 struct zapi_route
*nhr
,
535 struct nexthop
*nexthop
;
536 struct nexthop
*oldnh
;
537 struct nexthop
*nhlist_head
= NULL
;
538 struct nexthop
*nhlist_tail
= NULL
;
540 bool evpn_resolved
= false;
542 bnc
->last_update
= monotime(NULL
);
543 bnc
->change_flags
= 0;
545 /* debug print the input */
546 if (BGP_DEBUG(nht
, NHT
)) {
547 char bnc_buf
[BNC_FLAG_DUMP_SIZE
];
550 "%s(%u): Rcvd NH update %pFX(%u)(%u) - metric %d/%d #nhops %d/%d flags %s",
551 bnc
->bgp
->name_pretty
, bnc
->bgp
->vrf_id
, &nhr
->prefix
,
552 bnc
->ifindex
, bnc
->srte_color
, nhr
->metric
, bnc
->metric
,
553 nhr
->nexthop_num
, bnc
->nexthop_num
,
554 bgp_nexthop_dump_bnc_flags(bnc
, bnc_buf
,
558 if (nhr
->metric
!= bnc
->metric
)
559 bnc
->change_flags
|= BGP_NEXTHOP_METRIC_CHANGED
;
561 if (nhr
->nexthop_num
!= bnc
->nexthop_num
)
562 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
564 if (import_check
&& (nhr
->type
== ZEBRA_ROUTE_BGP
||
565 !prefix_same(&bnc
->prefix
, &nhr
->prefix
))) {
566 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
567 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
568 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_LABELED_VALID
);
569 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_EVPN_INCOMPLETE
);
571 bnc_nexthop_free(bnc
);
574 if (BGP_DEBUG(nht
, NHT
))
576 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
577 __func__
, &bnc
->prefix
, &nhr
->prefix
);
578 } else if (nhr
->nexthop_num
) {
579 struct peer
*peer
= bnc
->nht_info
;
581 /* notify bgp fsm if nbr ip goes from invalid->valid */
582 if (!bnc
->nexthop_num
)
583 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
585 if (!bnc
->is_evpn_gwip_nexthop
)
586 bnc
->flags
|= BGP_NEXTHOP_VALID
;
587 bnc
->metric
= nhr
->metric
;
588 bnc
->nexthop_num
= nhr
->nexthop_num
;
590 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
; /* check below */
592 for (i
= 0; i
< nhr
->nexthop_num
; i
++) {
595 nexthop
= nexthop_from_zapi_nexthop(&nhr
->nexthops
[i
]);
598 * Turn on RA for the v6 nexthops
599 * we receive from bgp. This is to allow us
600 * to work with v4 routing over v6 nexthops
602 if (peer
&& !peer
->ifp
603 && CHECK_FLAG(peer
->flags
,
604 PEER_FLAG_CAPABILITY_ENHE
)
605 && nhr
->prefix
.family
== AF_INET6
606 && nexthop
->type
!= NEXTHOP_TYPE_BLACKHOLE
) {
607 struct interface
*ifp
;
609 ifp
= if_lookup_by_index(nexthop
->ifindex
,
612 zclient_send_interface_radv_req(
613 zclient
, nexthop
->vrf_id
, ifp
,
615 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
617 /* There is at least one label-switched path */
618 if (nexthop
->nh_label
&&
619 nexthop
->nh_label
->num_labels
) {
621 bnc
->flags
|= BGP_NEXTHOP_LABELED_VALID
;
622 num_labels
= nexthop
->nh_label
->num_labels
;
625 if (BGP_DEBUG(nht
, NHT
)) {
626 char buf
[NEXTHOP_STRLEN
];
628 " nhop via %s (%d labels)",
629 nexthop2str(nexthop
, buf
, sizeof(buf
)),
634 nhlist_tail
->next
= nexthop
;
635 nhlist_tail
= nexthop
;
637 nhlist_tail
= nexthop
;
638 nhlist_head
= nexthop
;
641 /* No need to evaluate the nexthop if we have already
643 * that there has been a change.
645 if (bnc
->change_flags
& BGP_NEXTHOP_CHANGED
)
648 for (oldnh
= bnc
->nexthop
; oldnh
; oldnh
= oldnh
->next
)
649 if (nexthop_same(oldnh
, nexthop
))
653 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
655 bnc_nexthop_free(bnc
);
656 bnc
->nexthop
= nhlist_head
;
659 * Gateway IP nexthop is L3 reachable. Mark it as
660 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
662 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
663 * When its mapping with EVPN RT-2 is established, unset
664 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
666 if (bnc
->is_evpn_gwip_nexthop
) {
667 evpn_resolved
= bgp_evpn_is_gateway_ip_resolved(bnc
);
669 if (BGP_DEBUG(nht
, NHT
))
671 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
673 (evpn_resolved
? "successful"
677 bnc
->flags
|= BGP_NEXTHOP_VALID
;
678 bnc
->flags
&= ~BGP_NEXTHOP_EVPN_INCOMPLETE
;
679 bnc
->change_flags
|= BGP_NEXTHOP_MACIP_CHANGED
;
681 bnc
->flags
|= BGP_NEXTHOP_EVPN_INCOMPLETE
;
682 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
686 bnc
->flags
&= ~BGP_NEXTHOP_EVPN_INCOMPLETE
;
687 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
688 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
;
689 bnc
->nexthop_num
= nhr
->nexthop_num
;
691 /* notify bgp fsm if nbr ip goes from valid->invalid */
692 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
694 bnc_nexthop_free(bnc
);
701 static void bgp_nht_ifp_table_handle(struct bgp
*bgp
,
702 struct bgp_nexthop_cache_head
*table
,
703 struct interface
*ifp
, bool up
)
705 struct bgp_nexthop_cache
*bnc
;
707 frr_each (bgp_nexthop_cache
, table
, bnc
) {
708 if (bnc
->ifindex
!= ifp
->ifindex
)
711 bnc
->last_update
= monotime(NULL
);
712 bnc
->change_flags
= 0;
715 * For interface based routes ( ala the v6 LL routes
716 * that this was written for ) the metric received
717 * for the connected route is 0 not 1.
721 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
722 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
723 bnc
->nexthop_num
= 1;
725 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
726 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
727 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
728 bnc
->nexthop_num
= 0;
734 static void bgp_nht_ifp_handle(struct interface
*ifp
, bool up
)
738 bgp
= ifp
->vrf
->info
;
742 bgp_nht_ifp_table_handle(bgp
, &bgp
->nexthop_cache_table
[AFI_IP
], ifp
,
744 bgp_nht_ifp_table_handle(bgp
, &bgp
->import_check_table
[AFI_IP
], ifp
,
746 bgp_nht_ifp_table_handle(bgp
, &bgp
->nexthop_cache_table
[AFI_IP6
], ifp
,
748 bgp_nht_ifp_table_handle(bgp
, &bgp
->import_check_table
[AFI_IP6
], ifp
,
752 void bgp_nht_ifp_up(struct interface
*ifp
)
754 bgp_nht_ifp_handle(ifp
, true);
757 void bgp_nht_ifp_down(struct interface
*ifp
)
759 bgp_nht_ifp_handle(ifp
, false);
762 static void bgp_nht_ifp_initial(struct event
*thread
)
764 ifindex_t ifindex
= EVENT_VAL(thread
);
765 struct bgp
*bgp
= EVENT_ARG(thread
);
766 struct interface
*ifp
= if_lookup_by_index(ifindex
, bgp
->vrf_id
);
771 if (BGP_DEBUG(nht
, NHT
))
773 "Handle NHT initial update for Intf %s(%d) status %s",
774 ifp
->name
, ifp
->ifindex
, if_is_up(ifp
) ? "up" : "down");
779 bgp_nht_ifp_down(ifp
);
783 * So the bnc code has the ability to handle interface up/down
784 * events to properly handle v6 LL peering.
785 * What is happening here:
786 * The event system for peering expects the nht code to
787 * report on the tracking events after we move to active
788 * So let's give the system a chance to report on that event
789 * in a manner that is expected.
791 void bgp_nht_interface_events(struct peer
*peer
)
793 struct bgp
*bgp
= peer
->bgp
;
794 struct bgp_nexthop_cache_head
*table
;
795 struct bgp_nexthop_cache
*bnc
;
797 ifindex_t ifindex
= 0;
799 if (!IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
802 if (!sockunion2hostprefix(&peer
->su
, &p
))
805 * Gather the ifindex for if up/down events to be
806 * tagged into this fun
808 if (peer
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
809 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
811 table
= &bgp
->nexthop_cache_table
[AFI_IP6
];
812 bnc
= bnc_find(table
, &p
, 0, ifindex
);
817 event_add_event(bm
->master
, bgp_nht_ifp_initial
, bnc
->bgp
,
821 void bgp_parse_nexthop_update(int command
, vrf_id_t vrf_id
)
823 struct bgp_nexthop_cache_head
*tree
= NULL
;
824 struct bgp_nexthop_cache
*bnc_nhc
, *bnc_import
;
827 struct zapi_route nhr
;
830 bgp
= bgp_lookup_by_vrf_id(vrf_id
);
834 "parse nexthop update: instance not found for vrf_id %u",
839 if (!zapi_nexthop_update_decode(zclient
->ibuf
, &match
, &nhr
)) {
840 zlog_err("%s[%s]: Failure to decode nexthop update", __func__
,
845 afi
= family2afi(match
.family
);
846 tree
= &bgp
->nexthop_cache_table
[afi
];
848 bnc_nhc
= bnc_find(tree
, &match
, nhr
.srte_color
, 0);
850 if (BGP_DEBUG(nht
, NHT
))
852 "parse nexthop update %pFX(%u)(%s): bnc info not found for nexthop cache",
853 &nhr
.prefix
, nhr
.srte_color
, bgp
->name_pretty
);
855 bgp_process_nexthop_update(bnc_nhc
, &nhr
, false);
857 tree
= &bgp
->import_check_table
[afi
];
859 bnc_import
= bnc_find(tree
, &match
, nhr
.srte_color
, 0);
861 if (BGP_DEBUG(nht
, NHT
))
863 "parse nexthop update %pFX(%u)(%s): bnc info not found for import check",
864 &nhr
.prefix
, nhr
.srte_color
, bgp
->name_pretty
);
866 bgp_process_nexthop_update(bnc_import
, &nhr
, true);
869 * HACK: if any BGP route is dependant on an SR-policy that doesn't
870 * exist, zebra will never send NH updates relative to that policy. In
871 * that case, whenever we receive an update about a colorless NH, update
872 * the corresponding colorful NHs that share the same endpoint but that
873 * are inactive. This ugly hack should work around the problem at the
874 * cost of a performance pernalty. Long term, what should be done is to
875 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
876 * which should provide a better infrastructure to solve this issue in
877 * a more efficient and elegant way.
879 if (nhr
.srte_color
== 0 && bnc_nhc
) {
880 struct bgp_nexthop_cache
*bnc_iter
;
882 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
884 if (!prefix_same(&bnc_nhc
->prefix
, &bnc_iter
->prefix
) ||
885 bnc_iter
->srte_color
== 0 ||
886 CHECK_FLAG(bnc_iter
->flags
, BGP_NEXTHOP_VALID
))
889 bgp_process_nexthop_update(bnc_iter
, &nhr
, false);
895 * Cleanup nexthop registration and status information for BGP nexthops
896 * pertaining to this VRF. This is invoked upon VRF deletion.
898 void bgp_cleanup_nexthops(struct bgp
*bgp
)
900 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
901 struct bgp_nexthop_cache
*bnc
;
903 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
905 /* Clear relevant flags. */
906 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
907 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
908 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
909 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_EVPN_INCOMPLETE
);
915 * make_prefix - make a prefix structure from the path (essentially
918 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
)
921 int is_bgp_static
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
922 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
925 struct bgp_dest
*net
= pi
->net
;
926 const struct prefix
*p_orig
= bgp_dest_get_prefix(net
);
929 if (p_orig
->family
== AF_FLOWSPEC
) {
932 return bgp_flowspec_get_first_nh(pi
->peer
->bgp
,
935 memset(p
, 0, sizeof(struct prefix
));
940 p
->u
.prefix4
= p_orig
->u
.prefix4
;
941 p
->prefixlen
= p_orig
->prefixlen
;
943 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
944 ipv4_mapped_ipv6_to_ipv4(
945 &pi
->attr
->mp_nexthop_global
, &ipv4
);
947 p
->prefixlen
= IPV4_MAX_BITLEN
;
949 if (p_orig
->family
== AF_EVPN
)
951 pi
->attr
->mp_nexthop_global_in
;
953 p
->u
.prefix4
= pi
->attr
->nexthop
;
954 p
->prefixlen
= IPV4_MAX_BITLEN
;
959 p
->family
= AF_INET6
;
962 p
->u
.prefix6
= p_orig
->u
.prefix6
;
963 p
->prefixlen
= p_orig
->prefixlen
;
965 /* If we receive MP_REACH nexthop with ::(LL)
966 * or LL(LL), use LL address as nexthop cache.
968 if (pi
->attr
->mp_nexthop_len
969 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
970 && (IN6_IS_ADDR_UNSPECIFIED(
971 &pi
->attr
->mp_nexthop_global
)
972 || IN6_IS_ADDR_LINKLOCAL(
973 &pi
->attr
->mp_nexthop_global
)))
974 p
->u
.prefix6
= pi
->attr
->mp_nexthop_local
;
975 /* If we receive MR_REACH with (GA)::(LL)
976 * then check for route-map to choose GA or LL
978 else if (pi
->attr
->mp_nexthop_len
979 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
) {
980 if (pi
->attr
->mp_nexthop_prefer_global
)
982 pi
->attr
->mp_nexthop_global
;
985 pi
->attr
->mp_nexthop_local
;
987 p
->u
.prefix6
= pi
->attr
->mp_nexthop_global
;
988 p
->prefixlen
= IPV6_MAX_BITLEN
;
992 if (BGP_DEBUG(nht
, NHT
)) {
994 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
995 __func__
, afi
, AFI_IP
, AFI_IP6
);
1003 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1006 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1007 * int command -- command to send to zebra
1011 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache
*bnc
, int command
)
1013 bool exact_match
= false;
1014 bool resolve_via_default
= false;
1020 /* Don't try to register if Zebra doesn't know of this instance. */
1021 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc
->bgp
)) {
1022 if (BGP_DEBUG(zebra
, ZEBRA
))
1024 "%s: No zebra instance to talk to, not installing NHT entry",
1029 if (!bgp_zebra_num_connects()) {
1030 if (BGP_DEBUG(zebra
, ZEBRA
))
1032 "%s: We have not connected yet, cannot send nexthops",
1035 if (command
== ZEBRA_NEXTHOP_REGISTER
) {
1036 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
))
1038 if (CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
1039 resolve_via_default
= true;
1042 if (BGP_DEBUG(zebra
, ZEBRA
))
1043 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__
,
1044 zserv_command_string(command
), &bnc
->prefix
,
1045 bnc
->bgp
->name_pretty
);
1047 ret
= zclient_send_rnh(zclient
, command
, &bnc
->prefix
, SAFI_UNICAST
,
1048 exact_match
, resolve_via_default
,
1050 if (ret
== ZCLIENT_SEND_FAILURE
) {
1051 flog_warn(EC_BGP_ZEBRA_SEND
,
1052 "sendmsg_nexthop: zclient_send_message() failed");
1056 if (command
== ZEBRA_NEXTHOP_REGISTER
)
1057 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1058 else if (command
== ZEBRA_NEXTHOP_UNREGISTER
)
1059 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1064 * register_zebra_rnh - register a NH/route with Zebra for notification
1065 * when the route or the route to the nexthop changes.
1067 * struct bgp_nexthop_cache *bnc
1071 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
)
1073 /* Check if we have already registered */
1074 if (bnc
->flags
& BGP_NEXTHOP_REGISTERED
)
1078 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1082 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_REGISTER
);
1086 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1088 * struct bgp_nexthop_cache *bnc
1092 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
)
1094 /* Check if we have already registered */
1095 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
1099 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1103 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_UNREGISTER
);
1107 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1109 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1113 void evaluate_paths(struct bgp_nexthop_cache
*bnc
)
1115 struct bgp_dest
*dest
;
1116 struct bgp_path_info
*path
;
1117 struct bgp_path_info
*bpi_ultimate
;
1119 struct peer
*peer
= (struct peer
*)bnc
->nht_info
;
1120 struct bgp_table
*table
;
1122 struct bgp
*bgp_path
;
1123 const struct prefix
*p
;
1125 if (BGP_DEBUG(nht
, NHT
)) {
1126 char bnc_buf
[BNC_FLAG_DUMP_SIZE
];
1127 char chg_buf
[BNC_FLAG_DUMP_SIZE
];
1130 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1131 &bnc
->prefix
, bnc
->ifindex
, bnc
->srte_color
,
1132 bnc
->bgp
->name_pretty
,
1133 bgp_nexthop_dump_bnc_flags(bnc
, bnc_buf
,
1135 bgp_nexthop_dump_bnc_change_flags(bnc
, chg_buf
,
1139 LIST_FOREACH (path
, &(bnc
->paths
), nh_thread
) {
1140 if (path
->type
== ZEBRA_ROUTE_BGP
&&
1141 (path
->sub_type
== BGP_ROUTE_NORMAL
||
1142 path
->sub_type
== BGP_ROUTE_STATIC
||
1143 path
->sub_type
== BGP_ROUTE_IMPORTED
))
1144 /* evaluate the path */
1146 else if (path
->sub_type
== BGP_ROUTE_REDISTRIBUTE
) {
1147 /* evaluate the path for redistributed routes
1148 * except those from VNC
1150 if ((path
->type
== ZEBRA_ROUTE_VNC
) ||
1151 (path
->type
== ZEBRA_ROUTE_VNC_DIRECT
))
1154 /* don't evaluate the path */
1158 assert(dest
&& bgp_dest_table(dest
));
1159 p
= bgp_dest_get_prefix(dest
);
1160 afi
= family2afi(p
->family
);
1161 table
= bgp_dest_table(dest
);
1165 * handle routes from other VRFs (they can have a
1166 * nexthop in THIS VRF). bgp_path is the bgp instance
1167 * that owns the route referencing this nexthop.
1169 bgp_path
= table
->bgp
;
1172 * Path becomes valid/invalid depending on whether the nexthop
1173 * reachable/unreachable.
1175 * In case of unicast routes that were imported from vpn
1176 * and that have labels, they are valid only if there are
1177 * nexthops with labels
1179 * If the nexthop is EVPN gateway-IP,
1180 * do not check for a valid label.
1183 bool bnc_is_valid_nexthop
= false;
1184 bool path_valid
= false;
1186 if (safi
== SAFI_UNICAST
&& path
->sub_type
== BGP_ROUTE_IMPORTED
1187 && path
->extra
&& path
->extra
->num_labels
1188 && (path
->attr
->evpn_overlay
.type
1189 != OVERLAY_INDEX_GATEWAY_IP
)) {
1190 bnc_is_valid_nexthop
=
1191 bgp_isvalid_nexthop_for_mpls(bnc
, path
) ? true
1194 if (bgp_update_martian_nexthop(
1195 bnc
->bgp
, afi
, safi
, path
->type
,
1196 path
->sub_type
, path
->attr
, dest
)) {
1197 if (BGP_DEBUG(nht
, NHT
))
1199 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1200 __func__
, dest
, bgp_path
->name
);
1202 bnc_is_valid_nexthop
=
1203 bgp_isvalid_nexthop(bnc
) ? true : false;
1206 if (BGP_DEBUG(nht
, NHT
)) {
1209 char rd_buf
[RD_ADDRSTRLEN
];
1212 (struct prefix_rd
*)bgp_dest_get_prefix(
1214 rd_buf
, sizeof(rd_buf
),
1215 bgp_get_asnotation(bnc
->bgp
));
1217 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
1218 afi
, safi
, dest
, rd_buf
,
1219 bgp_path
->name_pretty
, path
->flags
);
1222 "... eval path %d/%d %pBD %s flags 0x%x",
1223 afi
, safi
, dest
, bgp_path
->name_pretty
,
1227 /* Skip paths marked for removal or as history. */
1228 if (CHECK_FLAG(path
->flags
, BGP_PATH_REMOVED
)
1229 || CHECK_FLAG(path
->flags
, BGP_PATH_HISTORY
))
1232 /* Copy the metric to the path. Will be used for bestpath
1234 bpi_ultimate
= bgp_get_imported_bpi_ultimate(path
);
1235 if (bgp_isvalid_nexthop(bnc
) && bnc
->metric
)
1236 (bgp_path_info_extra_get(bpi_ultimate
))->igpmetric
=
1238 else if (bpi_ultimate
->extra
)
1239 bpi_ultimate
->extra
->igpmetric
= 0;
1241 if (CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_METRIC_CHANGED
)
1242 || CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
)
1243 || path
->attr
->srte_color
!= 0)
1244 SET_FLAG(path
->flags
, BGP_PATH_IGP_CHANGED
);
1246 path_valid
= CHECK_FLAG(path
->flags
, BGP_PATH_VALID
);
1247 if (path
->type
== ZEBRA_ROUTE_BGP
&&
1248 path
->sub_type
== BGP_ROUTE_STATIC
&&
1249 !CHECK_FLAG(bgp_path
->flags
, BGP_FLAG_IMPORT_CHECK
))
1250 /* static routes with 'no bgp network import-check' are
1251 * always valid. if nht is called with static routes,
1252 * the vpn exportation needs to be triggered
1254 vpn_leak_from_vrf_update(bgp_get_default(), bgp_path
,
1256 else if (path
->sub_type
== BGP_ROUTE_REDISTRIBUTE
&&
1257 safi
== SAFI_UNICAST
&&
1258 (bgp_path
->inst_type
== BGP_INSTANCE_TYPE_VRF
||
1259 bgp_path
->inst_type
== BGP_INSTANCE_TYPE_DEFAULT
))
1260 /* redistribute routes are always valid
1261 * if nht is called with redistribute routes, the vpn
1262 * exportation needs to be triggered
1264 vpn_leak_from_vrf_update(bgp_get_default(), bgp_path
,
1266 else if (path_valid
!= bnc_is_valid_nexthop
) {
1268 /* No longer valid, clear flag; also for EVPN
1269 * routes, unimport from VRFs if needed.
1271 bgp_aggregate_decrement(bgp_path
, p
, path
, afi
,
1273 bgp_path_info_unset_flag(dest
, path
,
1275 if (safi
== SAFI_EVPN
&&
1276 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
1277 bgp_evpn_unimport_route(bgp_path
,
1278 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
1279 if (safi
== SAFI_UNICAST
&&
1280 (bgp_path
->inst_type
!=
1281 BGP_INSTANCE_TYPE_VIEW
))
1282 vpn_leak_from_vrf_withdraw(
1283 bgp_get_default(), bgp_path
,
1286 /* Path becomes valid, set flag; also for EVPN
1287 * routes, import from VRFs if needed.
1289 bgp_path_info_set_flag(dest
, path
,
1291 bgp_aggregate_increment(bgp_path
, p
, path
, afi
,
1293 if (safi
== SAFI_EVPN
&&
1294 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
1295 bgp_evpn_import_route(bgp_path
,
1296 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
1297 if (safi
== SAFI_UNICAST
&&
1298 (bgp_path
->inst_type
!=
1299 BGP_INSTANCE_TYPE_VIEW
))
1300 vpn_leak_from_vrf_update(
1301 bgp_get_default(), bgp_path
,
1306 bgp_process(bgp_path
, dest
, afi
, safi
);
1310 int valid_nexthops
= bgp_isvalid_nexthop(bnc
);
1312 if (valid_nexthops
) {
1314 * Peering cannot occur across a blackhole nexthop
1316 if (bnc
->nexthop_num
== 1 && bnc
->nexthop
1317 && bnc
->nexthop
->type
== NEXTHOP_TYPE_BLACKHOLE
) {
1318 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
1321 peer
->last_reset
= PEER_DOWN_WAITING_OPEN
;
1323 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
1325 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
)) {
1326 if (BGP_DEBUG(nht
, NHT
))
1328 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1329 __func__
, peer
->host
,
1330 peer
->bgp
->name_pretty
,
1332 bgp_fsm_nht_update(peer
, !!valid_nexthops
);
1333 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
1337 RESET_FLAG(bnc
->change_flags
);
1341 * path_nh_map - make or break path-to-nexthop association.
1343 * path - pointer to the path structure
1344 * bnc - pointer to the nexthop structure
1345 * make - if set, make the association. if unset, just break the existing
1348 void path_nh_map(struct bgp_path_info
*path
, struct bgp_nexthop_cache
*bnc
,
1351 if (path
->nexthop
) {
1352 LIST_REMOVE(path
, nh_thread
);
1353 path
->nexthop
->path_count
--;
1354 path
->nexthop
= NULL
;
1357 LIST_INSERT_HEAD(&(bnc
->paths
), path
, nh_thread
);
1358 path
->nexthop
= bnc
;
1359 path
->nexthop
->path_count
++;
1364 * This function is called to register nexthops to zebra
1365 * as that we may have tried to install the nexthops
1366 * before we actually have a zebra connection
1368 void bgp_nht_register_nexthops(struct bgp
*bgp
)
1370 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
1371 struct bgp_nexthop_cache
*bnc
;
1373 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
1375 register_zebra_rnh(bnc
);
1380 void bgp_nht_reg_enhe_cap_intfs(struct peer
*peer
)
1383 struct bgp_nexthop_cache
*bnc
;
1384 struct nexthop
*nhop
;
1385 struct interface
*ifp
;
1387 ifindex_t ifindex
= 0;
1393 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
1394 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1395 __func__
, peer
->host
);
1399 if (p
.family
!= AF_INET6
)
1402 * Gather the ifindex for if up/down events to be
1403 * tagged into this fun
1405 if (peer
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
1406 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
1408 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0, ifindex
);
1412 if (peer
!= bnc
->nht_info
)
1415 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
1416 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
1421 zclient_send_interface_radv_req(zclient
,
1424 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
1428 void bgp_nht_dereg_enhe_cap_intfs(struct peer
*peer
)
1431 struct bgp_nexthop_cache
*bnc
;
1432 struct nexthop
*nhop
;
1433 struct interface
*ifp
;
1435 ifindex_t ifindex
= 0;
1442 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
1443 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1444 __func__
, peer
->host
);
1448 if (p
.family
!= AF_INET6
)
1451 * Gather the ifindex for if up/down events to be
1452 * tagged into this fun
1454 if (peer
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
1455 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
1457 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0, ifindex
);
1461 if (peer
!= bnc
->nht_info
)
1464 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
1465 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
1470 zclient_send_interface_radv_req(zclient
, nhop
->vrf_id
, ifp
, 0,
1475 /****************************************************************************
1476 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1477 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1478 * left to the application using it.
1479 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1480 * failover of remote ES links.
1481 ***************************************************************************/
1482 static bitfield_t bgp_nh_id_bitmap
;
1483 static uint32_t bgp_l3nhg_start
;
1485 /* XXX - currently we do nothing on the callbacks */
1486 static void bgp_l3nhg_add_cb(const char *name
)
1490 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd
*nhgc
)
1494 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
1495 const struct nexthop
*nhop
)
1499 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
1500 const struct nexthop
*nhop
)
1504 static void bgp_l3nhg_del_cb(const char *name
)
1508 static void bgp_l3nhg_zebra_init(void)
1510 static bool bgp_l3nhg_zebra_inited
;
1511 if (bgp_l3nhg_zebra_inited
)
1514 bgp_l3nhg_zebra_inited
= true;
1515 bgp_l3nhg_start
= zclient_get_nhg_start(ZEBRA_ROUTE_BGP
);
1516 nexthop_group_init(bgp_l3nhg_add_cb
, bgp_l3nhg_modify_cb
,
1517 bgp_l3nhg_add_nexthop_cb
, bgp_l3nhg_del_nexthop_cb
,
1522 void bgp_l3nhg_init(void)
1526 id_max
= MIN(ZEBRA_NHG_PROTO_SPACING
- 1, 16 * 1024);
1527 bf_init(bgp_nh_id_bitmap
, id_max
);
1528 bf_assign_zero_index(bgp_nh_id_bitmap
);
1530 if (BGP_DEBUG(nht
, NHT
) || BGP_DEBUG(evpn_mh
, EVPN_MH_ES
))
1531 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start
+ 1,
1532 bgp_l3nhg_start
+ id_max
);
1535 void bgp_l3nhg_finish(void)
1537 bf_free(bgp_nh_id_bitmap
);
1540 uint32_t bgp_l3nhg_id_alloc(void)
1542 uint32_t nhg_id
= 0;
1544 bgp_l3nhg_zebra_init();
1545 bf_assign_index(bgp_nh_id_bitmap
, nhg_id
);
1547 nhg_id
+= bgp_l3nhg_start
;
1552 void bgp_l3nhg_id_free(uint32_t nhg_id
)
1554 if (!nhg_id
|| (nhg_id
<= bgp_l3nhg_start
))
1557 nhg_id
-= bgp_l3nhg_start
;
1559 bf_release_index(bgp_nh_id_bitmap
, nhg_id
);