1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "nexthop_group.h"
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
50 extern struct zclient
*zclient
;
52 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
53 int is_bgp_static_route
);
54 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
55 int is_bgp_static_route
);
56 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
);
57 static int bgp_nht_ifp_initial(struct thread
*thread
);
59 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache
*bnc
)
61 return (bgp_zebra_num_connects() == 0
62 || (bnc
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
)
63 && bnc
->nexthop_num
> 0));
66 static int bgp_isvalid_labeled_nexthop(struct bgp_nexthop_cache
*bnc
)
69 * In the case of MPLS-VPN, the label is learned from LDP or other
70 * protocols, and nexthop tracking is enabled for the label.
71 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
72 * In the case of SRv6-VPN, we need to track the reachability to the
73 * SID (in other words, IPv6 address). As in MPLS, we need to record
74 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
75 * currently not implemented, and this function assumes that all
76 * Transit routes for SRv6-VPN are valid.
78 return (bgp_zebra_num_connects() == 0
79 || (bnc
&& bnc
->nexthop_num
> 0
80 && (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_LABELED_VALID
)
81 || bnc
->bgp
->srv6_enabled
)));
84 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache
*bnc
)
86 if (LIST_EMPTY(&(bnc
->paths
)) && !bnc
->nht_info
) {
87 if (BGP_DEBUG(nht
, NHT
)) {
88 char buf
[PREFIX2STR_BUFFER
];
89 zlog_debug("%s: freeing bnc %s(%u)(%s)", __func__
,
90 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
),
91 bnc
->srte_color
, bnc
->bgp
->name_pretty
);
93 /* only unregister if this is the last nh for this prefix*/
94 if (!bnc_existing_for_prefix(bnc
))
96 bnc
, CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
));
101 void bgp_unlink_nexthop(struct bgp_path_info
*path
)
103 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
108 path_nh_map(path
, NULL
, false);
110 bgp_unlink_nexthop_check(bnc
);
113 void bgp_replace_nexthop_by_peer(struct peer
*from
, struct peer
*to
)
117 struct bgp_nexthop_cache
*bncp
, *bnct
;
120 if (!sockunion2hostprefix(&from
->su
, &pp
))
123 afi
= family2afi(pp
.family
);
124 bncp
= bnc_find(&from
->bgp
->nexthop_cache_table
[afi
], &pp
, 0);
126 if (!sockunion2hostprefix(&to
->su
, &pt
))
129 bnct
= bnc_find(&to
->bgp
->nexthop_cache_table
[afi
], &pt
, 0);
138 void bgp_unlink_nexthop_by_peer(struct peer
*peer
)
141 struct bgp_nexthop_cache
*bnc
;
142 afi_t afi
= family2afi(peer
->su
.sa
.sa_family
);
144 if (!sockunion2hostprefix(&peer
->su
, &p
))
147 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[afi
], &p
, 0);
151 /* cleanup the peer reference */
152 bnc
->nht_info
= NULL
;
154 bgp_unlink_nexthop_check(bnc
);
158 * A route and its nexthop might belong to different VRFs. Therefore,
159 * we need both the bgp_route and bgp_nexthop pointers.
161 int bgp_find_or_add_nexthop(struct bgp
*bgp_route
, struct bgp
*bgp_nexthop
,
162 afi_t afi
, safi_t safi
, struct bgp_path_info
*pi
,
163 struct peer
*peer
, int connected
)
165 struct bgp_nexthop_cache_head
*tree
= NULL
;
166 struct bgp_nexthop_cache
*bnc
;
168 uint32_t srte_color
= 0;
169 int is_bgp_static_route
= 0;
170 ifindex_t ifindex
= 0;
173 is_bgp_static_route
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
174 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
178 /* Since Extended Next-hop Encoding (RFC5549) support, we want
180 address-family from the next-hop. */
181 if (!is_bgp_static_route
)
182 afi
= BGP_ATTR_NEXTHOP_AFI_IP6(pi
->attr
) ? AFI_IP6
185 /* Validation for the ipv4 mapped ipv6 nexthop. */
186 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
190 /* This will return true if the global IPv6 NH is a link local
192 if (make_prefix(afi
, pi
, &p
) < 0)
195 srte_color
= pi
->attr
->srte_color
;
198 * Gather the ifindex for if up/down events to be
199 * tagged into this fun
202 && IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
203 ifindex
= peer
->su
.sin6
.sin6_scope_id
;
205 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
206 if (BGP_DEBUG(nht
, NHT
)) {
208 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
209 __func__
, afi
, AFI_IP
, AFI_IP6
);
216 if (is_bgp_static_route
)
217 tree
= &bgp_nexthop
->import_check_table
[afi
];
219 tree
= &bgp_nexthop
->nexthop_cache_table
[afi
];
221 bnc
= bnc_find(tree
, &p
, srte_color
);
223 bnc
= bnc_new(tree
, &p
, srte_color
);
224 bnc
->bgp
= bgp_nexthop
;
225 bnc
->ifindex
= ifindex
;
226 if (BGP_DEBUG(nht
, NHT
)) {
227 char buf
[PREFIX2STR_BUFFER
];
229 zlog_debug("Allocated bnc %s(%u)(%s) peer %p",
230 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
),
231 bnc
->srte_color
, bnc
->bgp
->name_pretty
,
235 if (BGP_DEBUG(nht
, NHT
)) {
236 char buf
[PREFIX2STR_BUFFER
];
239 "Found existing bnc %s(%s) flags 0x%x ifindex %d #paths %d peer %p",
240 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
),
241 bnc
->bgp
->name_pretty
, bnc
->flags
, bnc
->ifindex
,
242 bnc
->path_count
, bnc
->nht_info
);
246 if (pi
&& is_route_parent_evpn(pi
))
247 bnc
->is_evpn_gwip_nexthop
= true;
249 if (is_bgp_static_route
) {
250 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
);
252 /* If we're toggling the type, re-register */
253 if ((CHECK_FLAG(bgp_route
->flags
, BGP_FLAG_IMPORT_CHECK
))
254 && !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
)) {
255 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
256 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
257 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
258 } else if ((!CHECK_FLAG(bgp_route
->flags
,
259 BGP_FLAG_IMPORT_CHECK
))
260 && CHECK_FLAG(bnc
->flags
,
261 BGP_STATIC_ROUTE_EXACT_MATCH
)) {
262 UNSET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
263 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
264 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
267 /* When nexthop is already known, but now requires 'connected'
269 * re-register it. The reverse scenario where the nexthop currently
271 * 'connected' resolution does not need a re-register (i.e., we treat
272 * 'connected-required' as an override) except in the scenario where
274 * is actually a case of tracking a peer for connectivity (e.g., after
275 * disable connected-check).
276 * NOTE: We don't track the number of paths separately for 'connected-
277 * required' vs 'connected-not-required' as this change is not a common
280 else if (connected
&& !CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
281 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
282 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
283 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
284 } else if (peer
&& !connected
285 && CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)) {
286 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
287 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
288 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
290 if (peer
&& (bnc
->ifindex
!= ifindex
)) {
291 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
292 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
293 bnc
->ifindex
= ifindex
;
295 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
) {
296 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
297 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
298 } else if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
)
299 && !is_default_host_route(&bnc
->prefix
))
300 register_zebra_rnh(bnc
, is_bgp_static_route
);
302 if (pi
&& pi
->nexthop
!= bnc
) {
303 /* Unlink from existing nexthop cache, if any. This will also
305 * the nexthop cache entry, if appropriate.
307 bgp_unlink_nexthop(pi
);
309 /* updates NHT pi list reference */
310 path_nh_map(pi
, bnc
, true);
312 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
313 (bgp_path_info_extra_get(pi
))->igpmetric
= bnc
->metric
;
315 pi
->extra
->igpmetric
= 0;
318 * Let's not accidently save the peer data for a peer
319 * we are going to throw away in a second or so.
320 * When we come back around we'll fix up this
321 * data properly in replace_nexthop_by_peer
323 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_CONFIG_NODE
))
324 bnc
->nht_info
= (void *)peer
; /* NHT peer reference */
328 * We are cheating here. Views have no associated underlying
329 * ability to detect nexthops. So when we have a view
330 * just tell everyone the nexthop is valid
332 if (bgp_route
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
334 else if (safi
== SAFI_UNICAST
&& pi
335 && pi
->sub_type
== BGP_ROUTE_IMPORTED
&& pi
->extra
336 && pi
->extra
->num_labels
&& !bnc
->is_evpn_gwip_nexthop
) {
337 return bgp_isvalid_labeled_nexthop(bnc
);
339 return (bgp_isvalid_nexthop(bnc
));
342 void bgp_delete_connected_nexthop(afi_t afi
, struct peer
*peer
)
344 struct bgp_nexthop_cache
*bnc
;
350 if (!sockunion2hostprefix(&peer
->su
, &p
))
353 bnc
= bnc_find(&peer
->bgp
->nexthop_cache_table
[family2afi(p
.family
)],
356 if (BGP_DEBUG(nht
, NHT
))
358 "Cannot find connected NHT node for peer %s(%s)",
359 peer
->host
, peer
->bgp
->name_pretty
);
363 if (bnc
->nht_info
!= peer
) {
364 if (BGP_DEBUG(nht
, NHT
))
366 "Connected NHT %p node for peer %s(%s) points to %p",
367 bnc
, peer
->host
, bnc
->bgp
->name_pretty
,
372 bnc
->nht_info
= NULL
;
374 if (LIST_EMPTY(&(bnc
->paths
))) {
375 if (BGP_DEBUG(nht
, NHT
))
377 "Freeing connected NHT node %p for peer %s(%s)",
378 bnc
, peer
->host
, bnc
->bgp
->name_pretty
);
379 unregister_zebra_rnh(bnc
, 0);
384 static void bgp_process_nexthop_update(struct bgp_nexthop_cache
*bnc
,
385 struct zapi_route
*nhr
)
387 struct nexthop
*nexthop
;
388 struct nexthop
*oldnh
;
389 struct nexthop
*nhlist_head
= NULL
;
390 struct nexthop
*nhlist_tail
= NULL
;
392 bool evpn_resolved
= false;
394 bnc
->last_update
= bgp_clock();
395 bnc
->change_flags
= 0;
397 /* debug print the input */
398 if (BGP_DEBUG(nht
, NHT
)) {
399 char bnc_buf
[BNC_FLAG_DUMP_SIZE
];
402 "%s(%u): Rcvd NH update %pFX(%u) - metric %d/%d #nhops %d/%d flags %s",
403 bnc
->bgp
->name_pretty
, bnc
->bgp
->vrf_id
, &nhr
->prefix
,
404 bnc
->srte_color
, nhr
->metric
, bnc
->metric
,
405 nhr
->nexthop_num
, bnc
->nexthop_num
,
406 bgp_nexthop_dump_bnc_flags(bnc
, bnc_buf
,
410 if (nhr
->metric
!= bnc
->metric
)
411 bnc
->change_flags
|= BGP_NEXTHOP_METRIC_CHANGED
;
413 if (nhr
->nexthop_num
!= bnc
->nexthop_num
)
414 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
416 if (nhr
->nexthop_num
) {
417 struct peer
*peer
= bnc
->nht_info
;
419 /* notify bgp fsm if nbr ip goes from invalid->valid */
420 if (!bnc
->nexthop_num
)
421 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
423 if (!bnc
->is_evpn_gwip_nexthop
)
424 bnc
->flags
|= BGP_NEXTHOP_VALID
;
425 bnc
->metric
= nhr
->metric
;
426 bnc
->nexthop_num
= nhr
->nexthop_num
;
428 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
; /* check below */
430 for (i
= 0; i
< nhr
->nexthop_num
; i
++) {
433 nexthop
= nexthop_from_zapi_nexthop(&nhr
->nexthops
[i
]);
436 * Turn on RA for the v6 nexthops
437 * we receive from bgp. This is to allow us
438 * to work with v4 routing over v6 nexthops
440 if (peer
&& !peer
->ifp
441 && CHECK_FLAG(peer
->flags
,
442 PEER_FLAG_CAPABILITY_ENHE
)
443 && nhr
->prefix
.family
== AF_INET6
444 && nexthop
->type
!= NEXTHOP_TYPE_BLACKHOLE
) {
445 struct interface
*ifp
;
447 ifp
= if_lookup_by_index(nexthop
->ifindex
,
450 zclient_send_interface_radv_req(
451 zclient
, nexthop
->vrf_id
, ifp
,
453 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
455 /* There is at least one label-switched path */
456 if (nexthop
->nh_label
&&
457 nexthop
->nh_label
->num_labels
) {
459 bnc
->flags
|= BGP_NEXTHOP_LABELED_VALID
;
460 num_labels
= nexthop
->nh_label
->num_labels
;
463 if (BGP_DEBUG(nht
, NHT
)) {
464 char buf
[NEXTHOP_STRLEN
];
466 " nhop via %s (%d labels)",
467 nexthop2str(nexthop
, buf
, sizeof(buf
)),
472 nhlist_tail
->next
= nexthop
;
473 nhlist_tail
= nexthop
;
475 nhlist_tail
= nexthop
;
476 nhlist_head
= nexthop
;
479 /* No need to evaluate the nexthop if we have already
481 * that there has been a change.
483 if (bnc
->change_flags
& BGP_NEXTHOP_CHANGED
)
486 for (oldnh
= bnc
->nexthop
; oldnh
; oldnh
= oldnh
->next
)
487 if (nexthop_same(oldnh
, nexthop
))
491 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
493 bnc_nexthop_free(bnc
);
494 bnc
->nexthop
= nhlist_head
;
497 * Gateway IP nexthop is L3 reachable. Mark it as
498 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
500 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
501 * When its mapping with EVPN RT-2 is established, unset
502 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
504 if (bnc
->is_evpn_gwip_nexthop
) {
505 evpn_resolved
= bgp_evpn_is_gateway_ip_resolved(bnc
);
507 if (BGP_DEBUG(nht
, NHT
)) {
508 char buf2
[PREFIX2STR_BUFFER
];
510 prefix2str(&bnc
->prefix
, buf2
, sizeof(buf2
));
512 "EVPN gateway IP %s recursive MAC/IP lookup %s",
514 (evpn_resolved
? "successful"
519 bnc
->flags
|= BGP_NEXTHOP_VALID
;
520 bnc
->flags
&= ~BGP_NEXTHOP_EVPN_INCOMPLETE
;
521 bnc
->change_flags
|= BGP_NEXTHOP_MACIP_CHANGED
;
523 bnc
->flags
|= BGP_NEXTHOP_EVPN_INCOMPLETE
;
524 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
528 bnc
->flags
&= ~BGP_NEXTHOP_EVPN_INCOMPLETE
;
529 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
530 bnc
->flags
&= ~BGP_NEXTHOP_LABELED_VALID
;
531 bnc
->nexthop_num
= nhr
->nexthop_num
;
533 /* notify bgp fsm if nbr ip goes from valid->invalid */
534 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
536 bnc_nexthop_free(bnc
);
543 static void bgp_nht_ifp_table_handle(struct bgp
*bgp
,
544 struct bgp_nexthop_cache_head
*table
,
545 struct interface
*ifp
, bool up
)
547 struct bgp_nexthop_cache
*bnc
;
549 frr_each (bgp_nexthop_cache
, table
, bnc
) {
550 if (bnc
->ifindex
!= ifp
->ifindex
)
553 bnc
->last_update
= bgp_clock();
554 bnc
->change_flags
= 0;
557 * For interface based routes ( ala the v6 LL routes
558 * that this was written for ) the metric received
559 * for the connected route is 0 not 1.
563 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
564 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
565 bnc
->nexthop_num
= 1;
567 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
568 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
569 SET_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
);
570 bnc
->nexthop_num
= 0;
576 static void bgp_nht_ifp_handle(struct interface
*ifp
, bool up
)
580 bgp
= bgp_lookup_by_vrf_id(ifp
->vrf_id
);
584 bgp_nht_ifp_table_handle(bgp
, &bgp
->nexthop_cache_table
[AFI_IP6
], ifp
,
586 bgp_nht_ifp_table_handle(bgp
, &bgp
->import_check_table
[AFI_IP6
], ifp
,
590 void bgp_nht_ifp_up(struct interface
*ifp
)
592 bgp_nht_ifp_handle(ifp
, true);
595 void bgp_nht_ifp_down(struct interface
*ifp
)
597 bgp_nht_ifp_handle(ifp
, false);
600 static int bgp_nht_ifp_initial(struct thread
*thread
)
602 ifindex_t ifindex
= THREAD_VAL(thread
);
603 struct interface
*ifp
= if_lookup_by_index_all_vrf(ifindex
);
608 if (BGP_DEBUG(nht
, NHT
))
610 "Handle NHT initial update for Intf %s(%d) status %s",
611 ifp
->name
, ifp
->ifindex
, if_is_up(ifp
) ? "up" : "down");
616 bgp_nht_ifp_down(ifp
);
622 * So the bnc code has the ability to handle interface up/down
623 * events to properly handle v6 LL peering.
624 * What is happening here:
625 * The event system for peering expects the nht code to
626 * report on the tracking events after we move to active
627 * So let's give the system a chance to report on that event
628 * in a manner that is expected.
630 void bgp_nht_interface_events(struct peer
*peer
)
632 struct bgp
*bgp
= peer
->bgp
;
633 struct bgp_nexthop_cache_head
*table
;
634 struct bgp_nexthop_cache
*bnc
;
637 if (!IN6_IS_ADDR_LINKLOCAL(&peer
->su
.sin6
.sin6_addr
))
640 if (!sockunion2hostprefix(&peer
->su
, &p
))
643 table
= &bgp
->nexthop_cache_table
[AFI_IP6
];
644 bnc
= bnc_find(table
, &p
, 0);
649 thread_add_event(bm
->master
, bgp_nht_ifp_initial
, NULL
,
653 void bgp_parse_nexthop_update(int command
, vrf_id_t vrf_id
)
655 struct bgp_nexthop_cache_head
*tree
= NULL
;
656 struct bgp_nexthop_cache
*bnc
;
658 struct zapi_route nhr
;
661 bgp
= bgp_lookup_by_vrf_id(vrf_id
);
665 "parse nexthop update: instance not found for vrf_id %u",
670 if (!zapi_nexthop_update_decode(zclient
->ibuf
, &nhr
)) {
671 zlog_err("%s[%s]: Failure to decode nexthop update", __func__
,
676 afi
= family2afi(nhr
.prefix
.family
);
677 if (command
== ZEBRA_NEXTHOP_UPDATE
)
678 tree
= &bgp
->nexthop_cache_table
[afi
];
679 else if (command
== ZEBRA_IMPORT_CHECK_UPDATE
)
680 tree
= &bgp
->import_check_table
[afi
];
682 bnc
= bnc_find(tree
, &nhr
.prefix
, nhr
.srte_color
);
684 if (BGP_DEBUG(nht
, NHT
))
686 "parse nexthop update(%pFX(%u)(%s)): bnc info not found",
687 &nhr
.prefix
, nhr
.srte_color
, bgp
->name_pretty
);
691 bgp_process_nexthop_update(bnc
, &nhr
);
694 * HACK: if any BGP route is dependant on an SR-policy that doesn't
695 * exist, zebra will never send NH updates relative to that policy. In
696 * that case, whenever we receive an update about a colorless NH, update
697 * the corresponding colorful NHs that share the same endpoint but that
698 * are inactive. This ugly hack should work around the problem at the
699 * cost of a performance pernalty. Long term, what should be done is to
700 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
701 * which should provide a better infrastructure to solve this issue in
702 * a more efficient and elegant way.
704 if (nhr
.srte_color
== 0) {
705 struct bgp_nexthop_cache
*bnc_iter
;
707 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
709 if (!prefix_same(&bnc
->prefix
, &bnc_iter
->prefix
)
710 || bnc_iter
->srte_color
== 0
711 || CHECK_FLAG(bnc_iter
->flags
, BGP_NEXTHOP_VALID
))
714 bgp_process_nexthop_update(bnc_iter
, &nhr
);
720 * Cleanup nexthop registration and status information for BGP nexthops
721 * pertaining to this VRF. This is invoked upon VRF deletion.
723 void bgp_cleanup_nexthops(struct bgp
*bgp
)
725 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
726 struct bgp_nexthop_cache
*bnc
;
728 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
730 /* Clear relevant flags. */
731 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
732 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
733 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
734 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_EVPN_INCOMPLETE
);
740 * make_prefix - make a prefix structure from the path (essentially
743 static int make_prefix(int afi
, struct bgp_path_info
*pi
, struct prefix
*p
)
746 int is_bgp_static
= ((pi
->type
== ZEBRA_ROUTE_BGP
)
747 && (pi
->sub_type
== BGP_ROUTE_STATIC
))
750 struct bgp_dest
*net
= pi
->net
;
751 const struct prefix
*p_orig
= bgp_dest_get_prefix(net
);
754 if (p_orig
->family
== AF_FLOWSPEC
) {
757 return bgp_flowspec_get_first_nh(pi
->peer
->bgp
,
760 memset(p
, 0, sizeof(struct prefix
));
765 p
->u
.prefix4
= p_orig
->u
.prefix4
;
766 p
->prefixlen
= p_orig
->prefixlen
;
768 if (IS_MAPPED_IPV6(&pi
->attr
->mp_nexthop_global
)) {
769 ipv4_mapped_ipv6_to_ipv4(
770 &pi
->attr
->mp_nexthop_global
, &ipv4
);
772 p
->prefixlen
= IPV4_MAX_BITLEN
;
774 p
->u
.prefix4
= pi
->attr
->nexthop
;
775 p
->prefixlen
= IPV4_MAX_BITLEN
;
780 p
->family
= AF_INET6
;
783 p
->u
.prefix6
= p_orig
->u
.prefix6
;
784 p
->prefixlen
= p_orig
->prefixlen
;
786 /* If we receive MP_REACH nexthop with ::(LL)
787 * or LL(LL), use LL address as nexthop cache.
789 if (pi
->attr
->mp_nexthop_len
790 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
791 && (IN6_IS_ADDR_UNSPECIFIED(
792 &pi
->attr
->mp_nexthop_global
)
793 || IN6_IS_ADDR_LINKLOCAL(
794 &pi
->attr
->mp_nexthop_global
)))
795 p
->u
.prefix6
= pi
->attr
->mp_nexthop_local
;
797 p
->u
.prefix6
= pi
->attr
->mp_nexthop_global
;
798 p
->prefixlen
= IPV6_MAX_BITLEN
;
802 if (BGP_DEBUG(nht
, NHT
)) {
804 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
805 __func__
, afi
, AFI_IP
, AFI_IP6
);
813 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
816 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
817 * int command -- command to send to zebra
821 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache
*bnc
, int command
)
823 bool exact_match
= false;
829 /* Don't try to register if Zebra doesn't know of this instance. */
830 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc
->bgp
)) {
831 if (BGP_DEBUG(zebra
, ZEBRA
))
833 "%s: No zebra instance to talk to, not installing NHT entry",
838 if (!bgp_zebra_num_connects()) {
839 if (BGP_DEBUG(zebra
, ZEBRA
))
841 "%s: We have not connected yet, cannot send nexthops",
844 if ((command
== ZEBRA_NEXTHOP_REGISTER
845 || command
== ZEBRA_IMPORT_ROUTE_REGISTER
)
846 && (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)
847 || CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
)))
850 if (BGP_DEBUG(zebra
, ZEBRA
))
851 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__
,
852 zserv_command_string(command
), &bnc
->prefix
,
853 bnc
->bgp
->name_pretty
);
855 ret
= zclient_send_rnh(zclient
, command
, &bnc
->prefix
, exact_match
,
857 /* TBD: handle the failure */
858 if (ret
== ZCLIENT_SEND_FAILURE
)
859 flog_warn(EC_BGP_ZEBRA_SEND
,
860 "sendmsg_nexthop: zclient_send_message() failed");
862 if ((command
== ZEBRA_NEXTHOP_REGISTER
)
863 || (command
== ZEBRA_IMPORT_ROUTE_REGISTER
))
864 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
865 else if ((command
== ZEBRA_NEXTHOP_UNREGISTER
)
866 || (command
== ZEBRA_IMPORT_ROUTE_UNREGISTER
))
867 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
872 * register_zebra_rnh - register a NH/route with Zebra for notification
873 * when the route or the route to the nexthop changes.
875 * struct bgp_nexthop_cache *bnc
879 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
880 int is_bgp_import_route
)
882 /* Check if we have already registered */
883 if (bnc
->flags
& BGP_NEXTHOP_REGISTERED
)
887 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
891 if (is_bgp_import_route
)
892 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_REGISTER
);
894 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_REGISTER
);
898 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
900 * struct bgp_nexthop_cache *bnc
904 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
905 int is_bgp_import_route
)
907 /* Check if we have already registered */
908 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
912 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
916 if (is_bgp_import_route
)
917 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_UNREGISTER
);
919 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_UNREGISTER
);
923 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
925 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
929 void evaluate_paths(struct bgp_nexthop_cache
*bnc
)
931 struct bgp_dest
*dest
;
932 struct bgp_path_info
*path
;
934 struct peer
*peer
= (struct peer
*)bnc
->nht_info
;
935 struct bgp_table
*table
;
937 struct bgp
*bgp_path
;
938 const struct prefix
*p
;
940 if (BGP_DEBUG(nht
, NHT
)) {
941 char buf
[PREFIX2STR_BUFFER
];
942 char bnc_buf
[BNC_FLAG_DUMP_SIZE
];
943 char chg_buf
[BNC_FLAG_DUMP_SIZE
];
945 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
);
947 "NH update for %s(%u)(%s) - flags %s chgflags %s- evaluate paths",
948 buf
, bnc
->srte_color
, bnc
->bgp
->name_pretty
,
949 bgp_nexthop_dump_bnc_flags(bnc
, bnc_buf
,
951 bgp_nexthop_dump_bnc_change_flags(bnc
, chg_buf
,
955 LIST_FOREACH (path
, &(bnc
->paths
), nh_thread
) {
956 if (!(path
->type
== ZEBRA_ROUTE_BGP
957 && ((path
->sub_type
== BGP_ROUTE_NORMAL
)
958 || (path
->sub_type
== BGP_ROUTE_STATIC
)
959 || (path
->sub_type
== BGP_ROUTE_IMPORTED
))))
963 assert(dest
&& bgp_dest_table(dest
));
964 p
= bgp_dest_get_prefix(dest
);
965 afi
= family2afi(p
->family
);
966 table
= bgp_dest_table(dest
);
970 * handle routes from other VRFs (they can have a
971 * nexthop in THIS VRF). bgp_path is the bgp instance
972 * that owns the route referencing this nexthop.
974 bgp_path
= table
->bgp
;
977 * Path becomes valid/invalid depending on whether the nexthop
978 * reachable/unreachable.
980 * In case of unicast routes that were imported from vpn
981 * and that have labels, they are valid only if there are
982 * nexthops with labels
984 * If the nexthop is EVPN gateway-IP,
985 * do not check for a valid label.
988 bool bnc_is_valid_nexthop
= false;
989 bool path_valid
= false;
991 if (safi
== SAFI_UNICAST
&& path
->sub_type
== BGP_ROUTE_IMPORTED
992 && path
->extra
&& path
->extra
->num_labels
993 && (path
->attr
->evpn_overlay
.type
994 != OVERLAY_INDEX_GATEWAY_IP
)) {
995 bnc_is_valid_nexthop
=
996 bgp_isvalid_labeled_nexthop(bnc
) ? true : false;
998 if (bgp_update_martian_nexthop(
999 bnc
->bgp
, afi
, safi
, path
->type
,
1000 path
->sub_type
, path
->attr
, dest
)) {
1001 if (BGP_DEBUG(nht
, NHT
))
1003 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1004 __func__
, dest
, bgp_path
->name
);
1006 bnc_is_valid_nexthop
=
1007 bgp_isvalid_nexthop(bnc
) ? true : false;
1010 if (BGP_DEBUG(nht
, NHT
)) {
1011 char buf1
[RD_ADDRSTRLEN
];
1014 prefix_rd2str((struct prefix_rd
*)bgp_dest_get_prefix(dest
->pdest
),
1015 buf1
, sizeof(buf1
));
1017 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
1018 afi
, safi
, dest
, buf1
,
1019 bgp_path
->name_pretty
, path
->flags
);
1022 "... eval path %d/%d %pBD %s flags 0x%x",
1023 afi
, safi
, dest
, bgp_path
->name_pretty
,
1027 /* Skip paths marked for removal or as history. */
1028 if (CHECK_FLAG(path
->flags
, BGP_PATH_REMOVED
)
1029 || CHECK_FLAG(path
->flags
, BGP_PATH_HISTORY
))
1032 /* Copy the metric to the path. Will be used for bestpath
1034 if (bgp_isvalid_nexthop(bnc
) && bnc
->metric
)
1035 (bgp_path_info_extra_get(path
))->igpmetric
=
1037 else if (path
->extra
)
1038 path
->extra
->igpmetric
= 0;
1040 if (CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_METRIC_CHANGED
)
1041 || CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
)
1042 || path
->attr
->srte_color
!= 0)
1043 SET_FLAG(path
->flags
, BGP_PATH_IGP_CHANGED
);
1045 path_valid
= CHECK_FLAG(path
->flags
, BGP_PATH_VALID
);
1046 if (path_valid
!= bnc_is_valid_nexthop
) {
1048 /* No longer valid, clear flag; also for EVPN
1049 * routes, unimport from VRFs if needed.
1051 bgp_aggregate_decrement(bgp_path
, p
, path
, afi
,
1053 bgp_path_info_unset_flag(dest
, path
,
1055 if (safi
== SAFI_EVPN
&&
1056 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
1057 bgp_evpn_unimport_route(bgp_path
,
1058 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
1060 /* Path becomes valid, set flag; also for EVPN
1061 * routes, import from VRFs if needed.
1063 bgp_path_info_set_flag(dest
, path
,
1065 bgp_aggregate_increment(bgp_path
, p
, path
, afi
,
1067 if (safi
== SAFI_EVPN
&&
1068 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest
)))
1069 bgp_evpn_import_route(bgp_path
,
1070 afi
, safi
, bgp_dest_get_prefix(dest
), path
);
1074 bgp_process(bgp_path
, dest
, afi
, safi
);
1078 int valid_nexthops
= bgp_isvalid_nexthop(bnc
);
1080 if (valid_nexthops
) {
1082 * Peering cannot occur across a blackhole nexthop
1084 if (bnc
->nexthop_num
== 1 && bnc
->nexthop
1085 && bnc
->nexthop
->type
== NEXTHOP_TYPE_BLACKHOLE
) {
1086 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
1089 peer
->last_reset
= PEER_DOWN_WAITING_OPEN
;
1091 peer
->last_reset
= PEER_DOWN_WAITING_NHT
;
1093 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
)) {
1094 if (BGP_DEBUG(nht
, NHT
))
1096 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1097 __func__
, peer
->host
,
1098 peer
->bgp
->name_pretty
,
1100 bgp_fsm_nht_update(peer
, !!valid_nexthops
);
1101 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
1105 RESET_FLAG(bnc
->change_flags
);
1109 * path_nh_map - make or break path-to-nexthop association.
1111 * path - pointer to the path structure
1112 * bnc - pointer to the nexthop structure
1113 * make - if set, make the association. if unset, just break the existing
1116 void path_nh_map(struct bgp_path_info
*path
, struct bgp_nexthop_cache
*bnc
,
1119 if (path
->nexthop
) {
1120 LIST_REMOVE(path
, nh_thread
);
1121 path
->nexthop
->path_count
--;
1122 path
->nexthop
= NULL
;
1125 LIST_INSERT_HEAD(&(bnc
->paths
), path
, nh_thread
);
1126 path
->nexthop
= bnc
;
1127 path
->nexthop
->path_count
++;
1132 * This function is called to register nexthops to zebra
1133 * as that we may have tried to install the nexthops
1134 * before we actually have a zebra connection
1136 void bgp_nht_register_nexthops(struct bgp
*bgp
)
1138 for (afi_t afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
1139 struct bgp_nexthop_cache
*bnc
;
1141 frr_each (bgp_nexthop_cache
, &bgp
->nexthop_cache_table
[afi
],
1143 register_zebra_rnh(bnc
, 0);
1148 void bgp_nht_reg_enhe_cap_intfs(struct peer
*peer
)
1151 struct bgp_nexthop_cache
*bnc
;
1152 struct nexthop
*nhop
;
1153 struct interface
*ifp
;
1160 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
1161 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1162 __func__
, peer
->host
);
1166 if (p
.family
!= AF_INET6
)
1169 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0);
1173 if (peer
!= bnc
->nht_info
)
1176 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
1177 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
1182 zclient_send_interface_radv_req(zclient
,
1185 BGP_UNNUM_DEFAULT_RA_INTERVAL
);
1189 void bgp_nht_dereg_enhe_cap_intfs(struct peer
*peer
)
1192 struct bgp_nexthop_cache
*bnc
;
1193 struct nexthop
*nhop
;
1194 struct interface
*ifp
;
1202 if (!sockunion2hostprefix(&peer
->su
, &p
)) {
1203 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1204 __func__
, peer
->host
);
1208 if (p
.family
!= AF_INET6
)
1211 bnc
= bnc_find(&bgp
->nexthop_cache_table
[AFI_IP6
], &p
, 0);
1215 if (peer
!= bnc
->nht_info
)
1218 for (nhop
= bnc
->nexthop
; nhop
; nhop
= nhop
->next
) {
1219 ifp
= if_lookup_by_index(nhop
->ifindex
, nhop
->vrf_id
);
1224 zclient_send_interface_radv_req(zclient
, nhop
->vrf_id
, ifp
, 0,
1229 /****************************************************************************
1230 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1231 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1232 * left to the application using it.
1233 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1234 * failover of remote ES links.
1235 ***************************************************************************/
1236 static bitfield_t bgp_nh_id_bitmap
;
1237 static uint32_t bgp_l3nhg_start
;
1239 /* XXX - currently we do nothing on the callbacks */
1240 static void bgp_l3nhg_add_cb(const char *name
)
1243 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
1244 const struct nexthop
*nhop
)
1247 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd
*nhgc
,
1248 const struct nexthop
*nhop
)
1251 static void bgp_l3nhg_del_cb(const char *name
)
1255 static void bgp_l3nhg_zebra_init(void)
1257 static bool bgp_l3nhg_zebra_inited
;
1258 if (bgp_l3nhg_zebra_inited
)
1261 bgp_l3nhg_zebra_inited
= true;
1262 bgp_l3nhg_start
= zclient_get_nhg_start(ZEBRA_ROUTE_BGP
);
1263 nexthop_group_init(bgp_l3nhg_add_cb
, bgp_l3nhg_add_nexthop_cb
,
1264 bgp_l3nhg_del_nexthop_cb
, bgp_l3nhg_del_cb
);
1268 #define min(A, B) ((A) < (B) ? (A) : (B))
1269 void bgp_l3nhg_init(void)
1273 id_max
= min(ZEBRA_NHG_PROTO_SPACING
- 1, 16 * 1024);
1274 bf_init(bgp_nh_id_bitmap
, id_max
);
1275 bf_assign_zero_index(bgp_nh_id_bitmap
);
1277 if (BGP_DEBUG(nht
, NHT
) || BGP_DEBUG(evpn_mh
, EVPN_MH_ES
))
1278 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start
+ 1,
1279 bgp_l3nhg_start
+ id_max
);
1282 void bgp_l3nhg_finish(void)
1284 bf_free(bgp_nh_id_bitmap
);
1287 uint32_t bgp_l3nhg_id_alloc(void)
1289 uint32_t nhg_id
= 0;
1291 bgp_l3nhg_zebra_init();
1292 bf_assign_index(bgp_nh_id_bitmap
, nhg_id
);
1294 nhg_id
+= bgp_l3nhg_start
;
1299 void bgp_l3nhg_id_free(uint32_t nhg_id
)
1301 if (!nhg_id
|| (nhg_id
<= bgp_l3nhg_start
))
1304 nhg_id
-= bgp_l3nhg_start
;
1306 bf_release_index(bgp_nh_id_bitmap
, nhg_id
);