1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "nexthop_group.h"
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49 #include "bgpd/bgp_mplsvpn.h"
51 extern struct zclient
*zclient
;
53 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
);
54 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
);
55 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
);
56 static void bgp_nht_ifp_initial(struct thread
*thread
);
58 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache
*bnc
)
60 return (bgp_zebra_num_connects() == 0
61 || (bnc
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
)
62 && bnc
->nexthop_num
> 0));
65 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache
*bnc
,
66 struct bgp_path_info
*path
)
68 struct interface
*ifp
= NULL
;
69 struct nexthop
*nexthop
;
70 struct bgp_interface
*iifp
;
73 if (!path
->extra
|| !path
->extra
->peer_orig
)
76 peer
= path
->extra
->peer_orig
;
78 /* only connected ebgp peers are valid */
79 if (peer
->sort
!= BGP_PEER_EBGP
|| peer
->ttl
!= BGP_DEFAULT_TTL
||
80 CHECK_FLAG(peer
->flags
, PEER_FLAG_DISABLE_CONNECTED_CHECK
) ||
81 CHECK_FLAG(peer
->bgp
->flags
, BGP_FLAG_DISABLE_NH_CONNECTED_CHK
))
84 for (nexthop
= bnc
->nexthop
; nexthop
; nexthop
= nexthop
->next
) {
85 if (nexthop
->type
== NEXTHOP_TYPE_IFINDEX
||
86 nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
||
87 nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
88 ifp
= if_lookup_by_index(
89 bnc
->ifindex
? bnc
->ifindex
: nexthop
->ifindex
,
95 if (CHECK_FLAG(iifp
->flags
, BGP_INTERFACE_MPLS_BGP_FORWARDING
))
101 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache
*bnc
,
102 struct bgp_path_info
*path
)
104 struct interface
*ifp
= NULL
;
105 struct nexthop
*nexthop
;
107 for (nexthop
= bnc
->nexthop
; nexthop
; nexthop
= nexthop
->next
) {
108 if (nexthop
->type
!= NEXTHOP_TYPE_BLACKHOLE
) {
109 ifp
= if_lookup_by_index(
110 bnc
->ifindex
? bnc
->ifindex
: nexthop
->ifindex
,
112 if (ifp
&& (ifp
->ll_type
== ZEBRA_LLT_IPGRE
||
113 ifp
->ll_type
== ZEBRA_LLT_IP6GRE
))
120 if (CHECK_FLAG(path
->attr
->rmap_change_flags
,
121 BATTR_RMAP_L3VPN_ACCEPT_GRE
))
127 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache
*bnc
,
128 struct bgp_path_info
*path
)
131 * - In the case of MPLS-VPN, the label is learned from LDP or other
132 * protocols, and nexthop tracking is enabled for the label.
133 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
134 * - In the case of SRv6-VPN, we need to track the reachability to the
135 * SID (in other words, IPv6 address). As in MPLS, we need to record
136 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
137 * currently not implemented, and this function assumes that all
138 * Transit routes for SRv6-VPN are valid.
139 * - Otherwise check for mpls-gre acceptance
141 return (bgp_zebra_num_connects() == 0 ||
142 (bnc
&& (bnc
->nexthop_num
> 0 &&
143 (CHECK_FLAG(path
->flags
, BGP_PATH_ACCEPT_OWN
) ||
144 CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_LABELED_VALID
) ||
145 bnc
->bgp
->srv6_enabled
||
146 bgp_isvalid_nexthop_for_ebgp(bnc
, path
) ||
147 bgp_isvalid_nexthop_for_mplsovergre(bnc
, path
)))));
150 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache
*bnc
)
152 if (LIST_EMPTY(&(bnc
->paths
)) && !bnc
->nht_info
) {
153 if (BGP_DEBUG(nht
, NHT
))
154 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__
,
155 &bnc
->prefix
, bnc
->ifindex
, bnc
->srte_color
,
156 bnc
->bgp
->name_pretty
);
157 /* only unregister if this is the last nh for this prefix*/
158 if (!bnc_existing_for_prefix(bnc
))
159 unregister_zebra_rnh(bnc
);
164 void bgp_unlink_nexthop(struct bgp_path_info
*path
)
166 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
171 path_nh_map(path
, NULL
, false);
173 bgp_unlink_nexthop_check(bnc
);
176 void bgp_replace_nexthop_by_peer(struct peer
*from
, struct peer
*to
)
180 struct bgp_nexthop_cache
*bncp
, *bnct
;
182 ifindex_t ifindex
= 0;
184 if (!sockunion2hostprefix(&from
->su
, &pp
))
188 * Gather the ifindex for if up/down events to be
189 * tagged into this fun
191 if (from
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&from
->su
.sin6
.sin6_addr
))
192 ifindex
= from
->su
.sin6
.sin6_scope_id
;
194 afi
= family2afi(pp
.family
);
195 bncp
= bnc_find(&from
->bgp
->nexthop_cache_table
[afi
], &pp
, 0, ifindex
);
197 if (!sockunion2hostprefix(&to
->su
, &pt
))
201 * Gather the ifindex for if up/down events to be
202 * tagged into this fun
205 if (to
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&to
->su
.sin6
.sin6_addr
))
206 ifindex
= to
->su
.sin6
.sin6_scope_id
;
207 bnct
= bnc_find(&to
->bgp
->nexthop_cache_table
[afi
], &pt
, 0, ifindex
);
217 * Returns the bnc whose bnc->nht_info matches the LL peer by
218 * looping through the IPv6 nexthop table
220 static struct bgp_nexthop_cache
*
221 bgp_find_ipv6_nexthop_matching_peer(struct peer
*peer
)
223 struct bgp_nexthop_cache
*bnc
;
225 frr_each (bgp_nexthop_cache
, &peer
->bgp
->nexthop_cache_table
[AFI_IP6
],
227 if (bnc
->nht_info
== peer
) {
228 if (BGP_DEBUG(nht
, NHT
)) {
230 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
231 &bnc
->prefix
, bnc
->ifindex
,
232 bnc
->srte_color
, bnc
, peer
->host
,
233 peer
->bgp
->name_pretty
, peer
);
239 if (BGP_DEBUG(nht
, NHT
))
241 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
242 peer
->host
, peer
->bgp
->name_pretty
, peer
);
247 void bgp_unlink_nexthop_by_peer(struct peer
*peer
)
250 struct bgp_nexthop_cache
*bnc
;
251 afi_t afi
= family2afi(peer
->su
.sa
.sa_family
);
252 ifindex_t ifindex
= 0;
254 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
256 * In scenarios where unnumbered BGP session is brought
257 * down by shutting down the interface before unconfiguring
258 * the BGP neighbor, neighbor information in peer->su.sa
259 * will be cleared when the interface is shutdown. So
260 * during the deletion of unnumbered bgp peer, above check
261 * will return true. Therefore, in this case,BGP needs to
262 * find the bnc whose bnc->nht_info matches the
263 * peer being deleted and free it.
265 bnc
= bgp_find_ipv6_nexthop_matching_peer(peer
);
268 * Gather the ifindex for if up/down events to be
269 * tagged into this fun
271 if (afi
== AFI_IP6
&&
272 IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
273 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
274 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[afi
], &p
, 0,
281 /* cleanup the peer reference */
282 bnc
->nht_info
= NULL
;
284 bgp_unlink_nexthop_check(bnc
);
288 * A route and its nexthop might belong to different VRFs. Therefore,
289 * we need both the bgp_route and bgp_nexthop pointers.
291 int bgp_find_or_add_nexthop(struct bgp
*bgp_route
, struct bgp
*bgp_nexthop
,
292 afi_t afi
, safi_t safi
, struct bgp_path_info
*pi
,
293 struct peer
*peer
, int connected
,
294 const struct prefix
*orig_prefix
)
296 struct bgp_nexthop_cache_head
*tree
= NULL
;
297 struct bgp_nexthop_cache
*bnc
;
298 struct bgp_path_info
*bpi_ultimate
;
300 uint32_t srte_color
= 0;
301 int is_bgp_static_route
= 0;
302 ifindex_t ifindex
= 0;
305 is_bgp_static_route
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
306 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
310 /* Since Extended Next-hop Encoding (RFC5549) support, we want
312 address-family from the next-hop. */
313 if (!is_bgp_static_route
)
314 afi
= BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi
->attr
) ? AFI_IP6
317 /* Validation for the ipv4 mapped ipv6 nexthop. */
318 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
322 /* This will return true if the global IPv6 NH is a link local
324 if (make_prefix(afi
, pi
, &p
) < 0)
327 if (!is_bgp_static_route
&& orig_prefix
328 && prefix_same(&p
, orig_prefix
)) {
329 if (BGP_DEBUG(nht
, NHT
)) {
331 "%s(%pFX): prefix loops through itself",
337 srte_color
= pi
->attr
->srte_color
;
340 * Gather the ifindex for if up/down events to be
341 * tagged into this fun
343 if (afi
== AFI_IP6
&&
344 IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
)) {
345 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
347 if (BGP_DEBUG(nht
, NHT
)) {
349 "%s: Unable to locate ifindex, waiting till we have one",
356 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
357 if (BGP_DEBUG(nht
, NHT
)) {
359 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
360 __func__
, afi
, AFI_IP
, AFI_IP6
);
367 if (is_bgp_static_route
)
368 tree
= &bgp_nexthop
->import_check_table
[afi
];
370 tree
= &bgp_nexthop
->nexthop_cache_table
[afi
];
372 bnc
= bnc_find(tree
, &p
, srte_color
, ifindex
);
374 bnc
= bnc_new(tree
, &p
, srte_color
, ifindex
);
375 bnc
->bgp
= bgp_nexthop
;
376 if (BGP_DEBUG(nht
, NHT
))
377 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
378 &bnc
->prefix
, bnc
->ifindex
, bnc
->srte_color
,
379 bnc
->bgp
->name_pretty
, peer
);
381 if (BGP_DEBUG(nht
, NHT
))
383 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
384 &bnc
->prefix
, bnc
->ifindex
,
385 bnc
->bgp
->name_pretty
, bnc
->flags
, bnc
->ifindex
,
386 bnc
->path_count
, bnc
->nht_info
);
389 if (pi
&& is_route_parent_evpn(pi
))
390 bnc
->is_evpn_gwip_nexthop
= true;
392 if (is_bgp_static_route
&& !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
)) {
393 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
);
395 /* If we're toggling the type, re-register */
396 if ((CHECK_FLAG(bgp_route
->flags
, BGP_FLAG_IMPORT_CHECK
))
397 && !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
)) {
398 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
399 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
400 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
401 } else if ((!CHECK_FLAG(bgp_route
->flags
,
402 BGP_FLAG_IMPORT_CHECK
))
403 && CHECK_FLAG(bnc
->flags
,
404 BGP_STATIC_ROUTE_EXACT_MATCH
)) {
405 UNSET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
406 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
407 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
410 /* When nexthop is already known, but now requires 'connected'
412 * re-register it. The reverse scenario where the nexthop currently
414 * 'connected' resolution does not need a re-register (i.e., we treat
415 * 'connected-required' as an override) except in the scenario where
417 * is actually a case of tracking a peer for connectivity (e.g., after
418 * disable connected-check).
419 * NOTE: We don't track the number of paths separately for 'connected-
420 * required' vs 'connected-not-required' as this change is not a common
423 else if (connected
&& !CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
424 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
425 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
426 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
427 } else if (peer
&& !connected
&&
428 CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
429 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
430 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
431 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
433 if (peer
&& (bnc
->ifindex
!= ifindex
)) {
434 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
435 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
436 bnc
->ifindex
= ifindex
;
438 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
) {
439 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
440 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
441 } else if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
)
442 && !is_default_host_route(&bnc
->prefix
))
443 register_zebra_rnh(bnc
);
445 if (pi
&& pi
->nexthop
!= bnc
) {
446 /* Unlink from existing nexthop cache, if any. This will also
448 * the nexthop cache entry, if appropriate.
450 bgp_unlink_nexthop(pi
);
452 /* updates NHT pi list reference */
453 path_nh_map(pi
, bnc
, true);
455 bpi_ultimate
= bgp_get_imported_bpi_ultimate(pi
);
456 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
457 (bgp_path_info_extra_get(bpi_ultimate
))->igpmetric
=
459 else if (bpi_ultimate
->extra
)
460 bpi_ultimate
->extra
->igpmetric
= 0;
463 * Let's not accidentally save the peer data for a peer
464 * we are going to throw away in a second or so.
465 * When we come back around we'll fix up this
466 * data properly in replace_nexthop_by_peer
468 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_CONFIG_NODE
))
469 bnc
->nht_info
= (void *)peer
; /* NHT peer reference */
473 * We are cheating here. Views have no associated underlying
474 * ability to detect nexthops. So when we have a view
475 * just tell everyone the nexthop is valid
477 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
479 else if (safi
== SAFI_UNICAST
&& pi
&&
480 pi
->sub_type
== BGP_ROUTE_IMPORTED
&& pi
->extra
&&
481 pi
->extra
->num_labels
&& !bnc
->is_evpn_gwip_nexthop
)
482 return bgp_isvalid_nexthop_for_mpls(bnc
, pi
);
484 return (bgp_isvalid_nexthop(bnc
));
487 void bgp_delete_connected_nexthop(afi_t afi
, struct peer
*peer
)
489 struct bgp_nexthop_cache
*bnc
;
491 ifindex_t ifindex
= 0;
497 * In case the below check evaluates true and if
498 * the bnc has not been freed at this point, then
499 * we might have to do something similar to what's
500 * done in bgp_unlink_nexthop_by_peer(). Since
501 * bgp_unlink_nexthop_by_peer() loops through the
502 * nodes of V6 nexthop cache to find the bnc, it is
503 * currently not being called here.
505 if (!sockunion2hostprefix(&peer
->su
, &p
))
508 * Gather the ifindex for if up/down events to be
509 * tagged into this fun
511 if (afi
== AFI_IP6
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
512 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
513 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[family2afi(p
.family
)],
516 if (BGP_DEBUG(nht
, NHT
))
518 "Cannot find connected NHT node for peer %s(%s)",
519 peer
->host
, peer
->bgp
->name_pretty
);
523 if (bnc
->nht_info
!= peer
) {
524 if (BGP_DEBUG(nht
, NHT
))
526 "Connected NHT %p node for peer %s(%s) points to %p",
527 bnc
, peer
->host
, bnc
->bgp
->name_pretty
,
532 bnc
->nht_info
= NULL
;
534 if (LIST_EMPTY(&(bnc
->paths
))) {
535 if (BGP_DEBUG(nht
, NHT
))
537 "Freeing connected NHT node %p for peer %s(%s)",
538 bnc
, peer
->host
, bnc
->bgp
->name_pretty
);
539 unregister_zebra_rnh(bnc
);
544 static void bgp_process_nexthop_update(struct bgp_nexthop_cache
*bnc
,
545 struct zapi_route
*nhr
,
548 struct nexthop
*nexthop
;
549 struct nexthop
*oldnh
;
550 struct nexthop
*nhlist_head
= NULL
;
551 struct nexthop
*nhlist_tail
= NULL
;
553 bool evpn_resolved
= false;
555 bnc
->last_update
= monotime(NULL
);
556 bnc
->change_flags
= 0;
558 /* debug print the input */
559 if (BGP_DEBUG(nht
, NHT
)) {
560 char bnc_buf
[BNC_FLAG_DUMP_SIZE
];
563 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
564 bnc
->bgp
->name_pretty
, bnc
->bgp
->vrf_id
, &nhr
->prefix
,
565 bnc
->ifindex
, bnc
->srte_color
, nhr
->metric
, bnc
->metric
,
566 nhr
->nexthop_num
, bnc
->nexthop_num
,
567 bgp_nexthop_dump_bnc_flags(bnc
, bnc_buf
,
571 if (nhr
->metric
!= bnc
->metric
)
572 bnc
->change_flags
|= BGP_NEXTHOP_METRIC_CHANGED
;
574 if (nhr
->nexthop_num
!= bnc
->nexthop_num
)
575 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
577 if (import_check
&& (nhr
->type
== ZEBRA_ROUTE_BGP
||
578 !prefix_same(&bnc
->prefix
, &nhr
->prefix
))) {
579 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
580 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
581 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_LABELED_VALID
);
582 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_EVPN_INCOMPLETE
);
584 bnc_nexthop_free(bnc
);
587 if (BGP_DEBUG(nht
, NHT
))
589 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
590 __func__
, &bnc
->prefix
, &nhr
->prefix
);
591 } else if (nhr
->nexthop_num
) {
592 struct peer
*peer
= bnc
->nht_info
;
594 /* notify bgp fsm if nbr ip goes from invalid->valid */
595 if (!bnc
->nexthop_num
)
596 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
598 if (!bnc
->is_evpn_gwip_nexthop
)
599 bnc
->flags
|= BGP_NEXTHOP_VALID
;
600 bnc
->metric
= nhr
->metric
;
601 bnc
->nexthop_num
= nhr
->nexthop_num
;
603 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
; /* check below */
605 for (i
= 0; i
< nhr
->nexthop_num
; i
++) {
608 nexthop
= nexthop_from_zapi_nexthop(&nhr
->nexthops
[i
]);
611 * Turn on RA for the v6 nexthops
612 * we receive from bgp. This is to allow us
613 * to work with v4 routing over v6 nexthops
615 if (peer
&& !peer
->ifp
616 && CHECK_FLAG(peer
->flags
,
617 PEER_FLAG_CAPABILITY_ENHE
)
618 && nhr
->prefix
.family
== AF_INET6
619 && nexthop
->type
!= NEXTHOP_TYPE_BLACKHOLE
) {
620 struct interface
*ifp
;
622 ifp
= if_lookup_by_index(nexthop
->ifindex
,
625 zclient_send_interface_radv_req(
626 zclient
, nexthop
->vrf_id
, ifp
,
628 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
630 /* There is at least one label-switched path */
631 if (nexthop
->nh_label
&&
632 nexthop
->nh_label
->num_labels
) {
634 bnc
->flags
|= BGP_NEXTHOP_LABELED_VALID
;
635 num_labels
= nexthop
->nh_label
->num_labels
;
638 if (BGP_DEBUG(nht
, NHT
)) {
639 char buf
[NEXTHOP_STRLEN
];
641 " nhop via %s (%d labels)",
642 nexthop2str(nexthop
, buf
, sizeof(buf
)),
647 nhlist_tail
->next
= nexthop
;
648 nhlist_tail
= nexthop
;
650 nhlist_tail
= nexthop
;
651 nhlist_head
= nexthop
;
654 /* No need to evaluate the nexthop if we have already
656 * that there has been a change.
658 if (bnc
->change_flags
& BGP_NEXTHOP_CHANGED
)
661 for (oldnh
= bnc
->nexthop
; oldnh
; oldnh
= oldnh
->next
)
662 if (nexthop_same(oldnh
, nexthop
))
666 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
668 bnc_nexthop_free(bnc
);
669 bnc
->nexthop
= nhlist_head
;
672 * Gateway IP nexthop is L3 reachable. Mark it as
673 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
675 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
676 * When its mapping with EVPN RT-2 is established, unset
677 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
679 if (bnc
->is_evpn_gwip_nexthop
) {
680 evpn_resolved
= bgp_evpn_is_gateway_ip_resolved(bnc
);
682 if (BGP_DEBUG(nht
, NHT
))
684 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
686 (evpn_resolved
? "successful"
690 bnc
->flags
|= BGP_NEXTHOP_VALID
;
691 bnc
->flags
&= ~BGP_NEXTHOP_EVPN_INCOMPLETE
;
692 bnc
->change_flags
|= BGP_NEXTHOP_MACIP_CHANGED
;
694 bnc
->flags
|= BGP_NEXTHOP_EVPN_INCOMPLETE
;
695 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
699 bnc
->flags
&= ~BGP_NEXTHOP_EVPN_INCOMPLETE
;
700 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
701 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
;
702 bnc
->nexthop_num
= nhr
->nexthop_num
;
704 /* notify bgp fsm if nbr ip goes from valid->invalid */
705 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
707 bnc_nexthop_free(bnc
);
714 static void bgp_nht_ifp_table_handle(struct bgp
*bgp
,
715 struct bgp_nexthop_cache_head
*table
,
716 struct interface
*ifp
, bool up
)
718 struct bgp_nexthop_cache
*bnc
;
720 frr_each (bgp_nexthop_cache
, table
, bnc
) {
721 if (bnc
->ifindex
!= ifp
->ifindex
)
724 bnc
->last_update
= monotime(NULL
);
725 bnc
->change_flags
= 0;
728 * For interface based routes ( ala the v6 LL routes
729 * that this was written for ) the metric received
730 * for the connected route is 0 not 1.
734 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
735 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
736 bnc
->nexthop_num
= 1;
738 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
739 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
740 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
741 bnc
->nexthop_num
= 0;
747 static void bgp_nht_ifp_handle(struct interface
*ifp
, bool up
)
751 bgp
= ifp
->vrf
->info
;
755 bgp_nht_ifp_table_handle(bgp
, &bgp
->nexthop_cache_table
[AFI_IP
], ifp
,
757 bgp_nht_ifp_table_handle(bgp
, &bgp
->import_check_table
[AFI_IP
], ifp
,
759 bgp_nht_ifp_table_handle(bgp
, &bgp
->nexthop_cache_table
[AFI_IP6
], ifp
,
761 bgp_nht_ifp_table_handle(bgp
, &bgp
->import_check_table
[AFI_IP6
], ifp
,
765 void bgp_nht_ifp_up(struct interface
*ifp
)
767 bgp_nht_ifp_handle(ifp
, true);
770 void bgp_nht_ifp_down(struct interface
*ifp
)
772 bgp_nht_ifp_handle(ifp
, false);
775 static void bgp_nht_ifp_initial(struct thread
*thread
)
777 ifindex_t ifindex
= THREAD_VAL(thread
);
778 struct bgp
*bgp
= THREAD_ARG(thread
);
779 struct interface
*ifp
= if_lookup_by_index(ifindex
, bgp
->vrf_id
);
784 if (BGP_DEBUG(nht
, NHT
))
786 "Handle NHT initial update for Intf %s(%d) status %s",
787 ifp
->name
, ifp
->ifindex
, if_is_up(ifp
) ? "up" : "down");
792 bgp_nht_ifp_down(ifp
);
796 * So the bnc code has the ability to handle interface up/down
797 * events to properly handle v6 LL peering.
798 * What is happening here:
799 * The event system for peering expects the nht code to
800 * report on the tracking events after we move to active
801 * So let's give the system a chance to report on that event
802 * in a manner that is expected.
804 void bgp_nht_interface_events(struct peer
*peer
)
806 struct bgp
*bgp
= peer
->bgp
;
807 struct bgp_nexthop_cache_head
*table
;
808 struct bgp_nexthop_cache
*bnc
;
810 ifindex_t ifindex
= 0;
812 if (!IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
815 if (!sockunion2hostprefix(&peer
->su
, &p
))
818 * Gather the ifindex for if up/down events to be
819 * tagged into this fun
821 if (peer
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
822 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
824 table
= &bgp
->nexthop_cache_table
[AFI_IP6
];
825 bnc
= bnc_find(table
, &p
, 0, ifindex
);
830 thread_add_event(bm
->master
, bgp_nht_ifp_initial
, bnc
->bgp
,
834 void bgp_parse_nexthop_update(int command
, vrf_id_t vrf_id
)
836 struct bgp_nexthop_cache_head
*tree
= NULL
;
837 struct bgp_nexthop_cache
*bnc_nhc
, *bnc_import
;
838 struct bgp_path_info
*pi
;
839 struct bgp_dest
*dest
;
842 struct zapi_route nhr
;
846 bgp
= bgp_lookup_by_vrf_id(vrf_id
);
850 "parse nexthop update: instance not found for vrf_id %u",
855 if (!zapi_nexthop_update_decode(zclient
->ibuf
, &match
, &nhr
)) {
856 zlog_err("%s[%s]: Failure to decode nexthop update", __func__
,
861 afi
= family2afi(match
.family
);
862 tree
= &bgp
->nexthop_cache_table
[afi
];
864 bnc_nhc
= bnc_find(tree
, &match
, nhr
.srte_color
, 0);
866 bgp_process_nexthop_update(bnc_nhc
, &nhr
, false);
867 else if (BGP_DEBUG(nht
, NHT
))
869 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
870 &nhr
.prefix
, nhr
.srte_color
, bgp
->name_pretty
);
872 tree
= &bgp
->import_check_table
[afi
];
874 bnc_import
= bnc_find(tree
, &match
, nhr
.srte_color
, 0);
876 bgp_process_nexthop_update(bnc_import
, &nhr
, true);
879 if (bgp
->rib
[afi
][safi
]) {
880 dest
= bgp_afi_node_get(bgp
->rib
[afi
][safi
], afi
, safi
,
883 for (pi
= bgp_dest_get_bgp_path_info(dest
); pi
;
885 if (pi
->peer
== bgp
->peer_self
&&
886 pi
->type
== ZEBRA_ROUTE_BGP
&&
887 pi
->sub_type
== BGP_ROUTE_STATIC
)
888 vpn_leak_from_vrf_update(
889 bgp_get_default(), bgp
, pi
);
891 } else if (BGP_DEBUG(nht
, NHT
))
893 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
894 &nhr
.prefix
, nhr
.srte_color
, bgp
->name_pretty
);
897 * HACK: if any BGP route is dependant on an SR-policy that doesn't
898 * exist, zebra will never send NH updates relative to that policy. In
899 * that case, whenever we receive an update about a colorless NH, update
900 * the corresponding colorful NHs that share the same endpoint but that
901 * are inactive. This ugly hack should work around the problem at the
902 * cost of a performance pernalty. Long term, what should be done is to
903 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
904 * which should provide a better infrastructure to solve this issue in
905 * a more efficient and elegant way.
907 if (nhr
.srte_color
== 0 && bnc_nhc
) {
908 struct bgp_nexthop_cache
*bnc_iter
;
910 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
912 if (!prefix_same(&bnc_nhc
->prefix
, &bnc_iter
->prefix
) ||
913 bnc_iter
->srte_color
== 0 ||
914 CHECK_FLAG(bnc_iter
->flags
, BGP_NEXTHOP_VALID
))
917 bgp_process_nexthop_update(bnc_iter
, &nhr
, false);
923 * Cleanup nexthop registration and status information for BGP nexthops
924 * pertaining to this VRF. This is invoked upon VRF deletion.
926 void bgp_cleanup_nexthops(struct bgp
*bgp
)
928 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
929 struct bgp_nexthop_cache
*bnc
;
931 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
933 /* Clear relevant flags. */
934 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
935 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
936 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
937 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_EVPN_INCOMPLETE
);
943 * make_prefix - make a prefix structure from the path (essentially
946 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
)
949 int is_bgp_static
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
950 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
953 struct bgp_dest
*net
= pi
->net
;
954 const struct prefix
*p_orig
= bgp_dest_get_prefix(net
);
957 if (p_orig
->family
== AF_FLOWSPEC
) {
960 return bgp_flowspec_get_first_nh(pi
->peer
->bgp
,
963 memset(p
, 0, sizeof(struct prefix
));
968 p
->u
.prefix4
= p_orig
->u
.prefix4
;
969 p
->prefixlen
= p_orig
->prefixlen
;
971 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
972 ipv4_mapped_ipv6_to_ipv4(
973 &pi
->attr
->mp_nexthop_global
, &ipv4
);
975 p
->prefixlen
= IPV4_MAX_BITLEN
;
977 if (p_orig
->family
== AF_EVPN
)
979 pi
->attr
->mp_nexthop_global_in
;
981 p
->u
.prefix4
= pi
->attr
->nexthop
;
982 p
->prefixlen
= IPV4_MAX_BITLEN
;
987 p
->family
= AF_INET6
;
990 p
->u
.prefix6
= p_orig
->u
.prefix6
;
991 p
->prefixlen
= p_orig
->prefixlen
;
993 /* If we receive MP_REACH nexthop with ::(LL)
994 * or LL(LL), use LL address as nexthop cache.
996 if (pi
->attr
->mp_nexthop_len
997 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
998 && (IN6_IS_ADDR_UNSPECIFIED(
999 &pi
->attr
->mp_nexthop_global
)
1000 || IN6_IS_ADDR_LINKLOCAL(
1001 &pi
->attr
->mp_nexthop_global
)))
1002 p
->u
.prefix6
= pi
->attr
->mp_nexthop_local
;
1003 /* If we receive MR_REACH with (GA)::(LL)
1004 * then check for route-map to choose GA or LL
1006 else if (pi
->attr
->mp_nexthop_len
1007 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
) {
1008 if (CHECK_FLAG(pi
->attr
->nh_flag
,
1009 BGP_ATTR_NH_MP_PREFER_GLOBAL
))
1011 pi
->attr
->mp_nexthop_global
;
1014 pi
->attr
->mp_nexthop_local
;
1016 p
->u
.prefix6
= pi
->attr
->mp_nexthop_global
;
1017 p
->prefixlen
= IPV6_MAX_BITLEN
;
1021 if (BGP_DEBUG(nht
, NHT
)) {
1023 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
1024 __func__
, afi
, AFI_IP
, AFI_IP6
);
1032 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1035 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1036 * int command -- command to send to zebra
1040 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache
*bnc
, int command
)
1042 bool exact_match
= false;
1043 bool resolve_via_default
= false;
1049 /* Don't try to register if Zebra doesn't know of this instance. */
1050 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc
->bgp
)) {
1051 if (BGP_DEBUG(zebra
, ZEBRA
))
1053 "%s: No zebra instance to talk to, not installing NHT entry",
1058 if (!bgp_zebra_num_connects()) {
1059 if (BGP_DEBUG(zebra
, ZEBRA
))
1061 "%s: We have not connected yet, cannot send nexthops",
1064 if (command
== ZEBRA_NEXTHOP_REGISTER
) {
1065 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
))
1067 if (CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
1068 resolve_via_default
= true;
1071 if (BGP_DEBUG(zebra
, ZEBRA
))
1072 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__
,
1073 zserv_command_string(command
), &bnc
->prefix
,
1074 bnc
->bgp
->name_pretty
);
1076 ret
= zclient_send_rnh(zclient
, command
, &bnc
->prefix
, SAFI_UNICAST
,
1077 exact_match
, resolve_via_default
,
1079 if (ret
== ZCLIENT_SEND_FAILURE
) {
1080 flog_warn(EC_BGP_ZEBRA_SEND
,
1081 "sendmsg_nexthop: zclient_send_message() failed");
1085 if (command
== ZEBRA_NEXTHOP_REGISTER
)
1086 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1087 else if (command
== ZEBRA_NEXTHOP_UNREGISTER
)
1088 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1093 * register_zebra_rnh - register a NH/route with Zebra for notification
1094 * when the route or the route to the nexthop changes.
1096 * struct bgp_nexthop_cache *bnc
1100 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
)
1102 /* Check if we have already registered */
1103 if (bnc
->flags
& BGP_NEXTHOP_REGISTERED
)
1107 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1111 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_REGISTER
);
1115 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1117 * struct bgp_nexthop_cache *bnc
1121 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
)
1123 /* Check if we have already registered */
1124 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
1128 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
1132 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_UNREGISTER
);
1136 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1138 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1142 void evaluate_paths(struct bgp_nexthop_cache
*bnc
)
1144 struct bgp_dest
*dest
;
1145 struct bgp_path_info
*path
;
1146 struct bgp_path_info
*bpi_ultimate
;
1148 struct peer
*peer
= (struct peer
*)bnc
->nht_info
;
1149 struct bgp_table
*table
;
1151 struct bgp
*bgp_path
;
1152 const struct prefix
*p
;
1154 if (BGP_DEBUG(nht
, NHT
)) {
1155 char bnc_buf
[BNC_FLAG_DUMP_SIZE
];
1156 char chg_buf
[BNC_FLAG_DUMP_SIZE
];
1159 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1160 &bnc
->prefix
, bnc
->ifindex
, bnc
->srte_color
,
1161 bnc
->bgp
->name_pretty
,
1162 bgp_nexthop_dump_bnc_flags(bnc
, bnc_buf
,
1164 bgp_nexthop_dump_bnc_change_flags(bnc
, chg_buf
,
1168 LIST_FOREACH (path
, &(bnc
->paths
), nh_thread
) {
1169 if (!(path
->type
== ZEBRA_ROUTE_BGP
1170 && ((path
->sub_type
== BGP_ROUTE_NORMAL
)
1171 || (path
->sub_type
== BGP_ROUTE_STATIC
)
1172 || (path
->sub_type
== BGP_ROUTE_IMPORTED
))))
1176 assert(dest
&& bgp_dest_table(dest
));
1177 p
= bgp_dest_get_prefix(dest
);
1178 afi
= family2afi(p
->family
);
1179 table
= bgp_dest_table(dest
);
1183 * handle routes from other VRFs (they can have a
1184 * nexthop in THIS VRF). bgp_path is the bgp instance
1185 * that owns the route referencing this nexthop.
1187 bgp_path
= table
->bgp
;
1190 * Path becomes valid/invalid depending on whether the nexthop
1191 * reachable/unreachable.
1193 * In case of unicast routes that were imported from vpn
1194 * and that have labels, they are valid only if there are
1195 * nexthops with labels
1197 * If the nexthop is EVPN gateway-IP,
1198 * do not check for a valid label.
1201 bool bnc_is_valid_nexthop
= false;
1202 bool path_valid
= false;
1204 if (safi
== SAFI_UNICAST
&& path
->sub_type
== BGP_ROUTE_IMPORTED
1205 && path
->extra
&& path
->extra
->num_labels
1206 && (path
->attr
->evpn_overlay
.type
1207 != OVERLAY_INDEX_GATEWAY_IP
)) {
1208 bnc_is_valid_nexthop
=
1209 bgp_isvalid_nexthop_for_mpls(bnc
, path
) ? true
1212 if (bgp_update_martian_nexthop(
1213 bnc
->bgp
, afi
, safi
, path
->type
,
1214 path
->sub_type
, path
->attr
, dest
)) {
1215 if (BGP_DEBUG(nht
, NHT
))
1217 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1218 __func__
, dest
, bgp_path
->name
);
1220 bnc_is_valid_nexthop
=
1221 bgp_isvalid_nexthop(bnc
) ? true : false;
1224 if (BGP_DEBUG(nht
, NHT
)) {
1227 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1229 (struct prefix_rd
*)bgp_dest_get_prefix(
1231 bgp_path
->name_pretty
, path
->flags
);
1234 "... eval path %d/%d %pBD %s flags 0x%x",
1235 afi
, safi
, dest
, bgp_path
->name_pretty
,
1239 /* Skip paths marked for removal or as history. */
1240 if (CHECK_FLAG(path
->flags
, BGP_PATH_REMOVED
)
1241 || CHECK_FLAG(path
->flags
, BGP_PATH_HISTORY
))
1244 /* Copy the metric to the path. Will be used for bestpath
1246 bpi_ultimate
= bgp_get_imported_bpi_ultimate(path
);
1247 if (bgp_isvalid_nexthop(bnc
) && bnc
->metric
)
1248 (bgp_path_info_extra_get(bpi_ultimate
))->igpmetric
=
1250 else if (bpi_ultimate
->extra
)
1251 bpi_ultimate
->extra
->igpmetric
= 0;
1253 if (CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_METRIC_CHANGED
)
1254 || CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
)
1255 || path
->attr
->srte_color
!= 0)
1256 SET_FLAG(path
->flags
, BGP_PATH_IGP_CHANGED
);
1258 path_valid
= CHECK_FLAG(path
->flags
, BGP_PATH_VALID
);
1259 if (path_valid
!= bnc_is_valid_nexthop
) {
1261 /* No longer valid, clear flag; also for EVPN
1262 * routes, unimport from VRFs if needed.
1264 bgp_aggregate_decrement(bgp_path
, p
, path
, afi
,
1266 bgp_path_info_unset_flag(dest
, path
,
1268 if (safi
== SAFI_EVPN
&&
1269 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
1270 bgp_evpn_unimport_route(bgp_path
,
1271 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
1273 /* Path becomes valid, set flag; also for EVPN
1274 * routes, import from VRFs if needed.
1276 bgp_path_info_set_flag(dest
, path
,
1278 bgp_aggregate_increment(bgp_path
, p
, path
, afi
,
1280 if (safi
== SAFI_EVPN
&&
1281 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
1282 bgp_evpn_import_route(bgp_path
,
1283 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
1287 bgp_process(bgp_path
, dest
, afi
, safi
);
1291 int valid_nexthops
= bgp_isvalid_nexthop(bnc
);
1293 if (valid_nexthops
) {
1295 * Peering cannot occur across a blackhole nexthop
1297 if (bnc
->nexthop_num
== 1 && bnc
->nexthop
1298 && bnc
->nexthop
->type
== NEXTHOP_TYPE_BLACKHOLE
) {
1299 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
1302 peer
->last_reset
= PEER_DOWN_WAITING_OPEN
;
1304 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
1306 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
)) {
1307 if (BGP_DEBUG(nht
, NHT
))
1309 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1310 __func__
, peer
->host
,
1311 peer
->bgp
->name_pretty
,
1313 bgp_fsm_nht_update(peer
, !!valid_nexthops
);
1314 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
1318 RESET_FLAG(bnc
->change_flags
);
1322 * path_nh_map - make or break path-to-nexthop association.
1324 * path - pointer to the path structure
1325 * bnc - pointer to the nexthop structure
1326 * make - if set, make the association. if unset, just break the existing
1329 void path_nh_map(struct bgp_path_info
*path
, struct bgp_nexthop_cache
*bnc
,
1332 if (path
->nexthop
) {
1333 LIST_REMOVE(path
, nh_thread
);
1334 path
->nexthop
->path_count
--;
1335 path
->nexthop
= NULL
;
1338 LIST_INSERT_HEAD(&(bnc
->paths
), path
, nh_thread
);
1339 path
->nexthop
= bnc
;
1340 path
->nexthop
->path_count
++;
1345 * This function is called to register nexthops to zebra
1346 * as that we may have tried to install the nexthops
1347 * before we actually have a zebra connection
1349 void bgp_nht_register_nexthops(struct bgp
*bgp
)
1351 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
1352 struct bgp_nexthop_cache
*bnc
;
1354 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
1356 register_zebra_rnh(bnc
);
1361 void bgp_nht_reg_enhe_cap_intfs(struct peer
*peer
)
1364 struct bgp_nexthop_cache
*bnc
;
1365 struct nexthop
*nhop
;
1366 struct interface
*ifp
;
1368 ifindex_t ifindex
= 0;
1374 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
1375 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1376 __func__
, peer
->host
);
1380 if (p
.family
!= AF_INET6
)
1383 * Gather the ifindex for if up/down events to be
1384 * tagged into this fun
1386 if (peer
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
1387 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
1389 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0, ifindex
);
1393 if (peer
!= bnc
->nht_info
)
1396 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
1397 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
1402 zclient_send_interface_radv_req(zclient
,
1405 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
1409 void bgp_nht_dereg_enhe_cap_intfs(struct peer
*peer
)
1412 struct bgp_nexthop_cache
*bnc
;
1413 struct nexthop
*nhop
;
1414 struct interface
*ifp
;
1416 ifindex_t ifindex
= 0;
1423 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
1424 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1425 __func__
, peer
->host
);
1429 if (p
.family
!= AF_INET6
)
1432 * Gather the ifindex for if up/down events to be
1433 * tagged into this fun
1435 if (peer
->conf_if
&& IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
1436 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
1438 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0, ifindex
);
1442 if (peer
!= bnc
->nht_info
)
1445 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
1446 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
1451 zclient_send_interface_radv_req(zclient
, nhop
->vrf_id
, ifp
, 0,
1456 /****************************************************************************
1457 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1458 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1459 * left to the application using it.
1460 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1461 * failover of remote ES links.
1462 ***************************************************************************/
1463 static bitfield_t bgp_nh_id_bitmap
;
1464 static uint32_t bgp_l3nhg_start
;
1466 /* XXX - currently we do nothing on the callbacks */
1467 static void bgp_l3nhg_add_cb(const char *name
)
1471 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd
*nhgc
)
1475 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
1476 const struct nexthop
*nhop
)
1480 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
1481 const struct nexthop
*nhop
)
1485 static void bgp_l3nhg_del_cb(const char *name
)
1489 static void bgp_l3nhg_zebra_init(void)
1491 static bool bgp_l3nhg_zebra_inited
;
1492 if (bgp_l3nhg_zebra_inited
)
1495 bgp_l3nhg_zebra_inited
= true;
1496 bgp_l3nhg_start
= zclient_get_nhg_start(ZEBRA_ROUTE_BGP
);
1497 nexthop_group_init(bgp_l3nhg_add_cb
, bgp_l3nhg_modify_cb
,
1498 bgp_l3nhg_add_nexthop_cb
, bgp_l3nhg_del_nexthop_cb
,
1503 void bgp_l3nhg_init(void)
1507 id_max
= MIN(ZEBRA_NHG_PROTO_SPACING
- 1, 16 * 1024);
1508 bf_init(bgp_nh_id_bitmap
, id_max
);
1509 bf_assign_zero_index(bgp_nh_id_bitmap
);
1511 if (BGP_DEBUG(nht
, NHT
) || BGP_DEBUG(evpn_mh
, EVPN_MH_ES
))
1512 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start
+ 1,
1513 bgp_l3nhg_start
+ id_max
);
1516 void bgp_l3nhg_finish(void)
1518 bf_free(bgp_nh_id_bitmap
);
1521 uint32_t bgp_l3nhg_id_alloc(void)
1523 uint32_t nhg_id
= 0;
1525 bgp_l3nhg_zebra_init();
1526 bf_assign_index(bgp_nh_id_bitmap
, nhg_id
);
1528 nhg_id
+= bgp_l3nhg_start
;
1533 void bgp_l3nhg_id_free(uint32_t nhg_id
)
1535 if (!nhg_id
|| (nhg_id
<= bgp_l3nhg_start
))
1538 nhg_id
-= bgp_l3nhg_start
;
1540 bf_release_index(bgp_nh_id_bitmap
, nhg_id
);