]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_nht.c
Merge pull request #12798 from donaldsharp/rib_match_multicast
[mirror_frr.git] / bgpd / bgp_nht.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
fb018d25
DS
2/* BGP Nexthop tracking
3 * Copyright (C) 2013 Cumulus Networks, Inc.
fb018d25
DS
4 */
5
6#include <zebra.h>
7
8#include "command.h"
9#include "thread.h"
10#include "prefix.h"
11#include "zclient.h"
12#include "stream.h"
13#include "network.h"
14#include "log.h"
15#include "memory.h"
16#include "nexthop.h"
7076bb2f 17#include "vrf.h"
039f3a34 18#include "filter.h"
8bcb09a1 19#include "nexthop_group.h"
fb018d25
DS
20
21#include "bgpd/bgpd.h"
22#include "bgpd/bgp_table.h"
23#include "bgpd/bgp_route.h"
24#include "bgpd/bgp_attr.h"
25#include "bgpd/bgp_nexthop.h"
26#include "bgpd/bgp_debug.h"
14454c9f 27#include "bgpd/bgp_errors.h"
fb018d25 28#include "bgpd/bgp_nht.h"
ffd0c037 29#include "bgpd/bgp_fsm.h"
afbb1c59 30#include "bgpd/bgp_zebra.h"
0378bcaa 31#include "bgpd/bgp_flowspec_util.h"
7c312383 32#include "bgpd/bgp_evpn.h"
9e15d76a 33#include "bgpd/bgp_rd.h"
fb018d25
DS
34
35extern struct zclient *zclient;
fb018d25 36
23f60ffd
DA
37static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
38static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
40381db7 39static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
cc9f21da 40static void bgp_nht_ifp_initial(struct thread *thread);
fb018d25 41
d62a17ae 42static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
d4d9d757 43{
d62a17ae 44 return (bgp_zebra_num_connects() == 0
c3b95419
EDP
45 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
46 && bnc->nexthop_num > 0));
d4d9d757
LB
47}
48
4cd690ae
PG
49static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
50 struct bgp_path_info *path)
51{
52 struct interface *ifp = NULL;
53 struct nexthop *nexthop;
54 struct bgp_interface *iifp;
55 struct peer *peer;
56
57 if (!path->extra || !path->extra->peer_orig)
58 return false;
59
60 peer = path->extra->peer_orig;
61
62 /* only connected ebgp peers are valid */
63 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
64 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
65 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
66 return false;
67
68 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
69 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
70 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
71 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
72 ifp = if_lookup_by_index(
73 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
74 bnc->bgp->vrf_id);
75 }
76 if (!ifp)
77 continue;
78 iifp = ifp->info;
79 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
80 return true;
81 }
82 return false;
83}
84
1bb550b6
PG
85static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
86 struct bgp_path_info *path)
87{
88 struct interface *ifp = NULL;
89 struct nexthop *nexthop;
90
91 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
92 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
93 ifp = if_lookup_by_index(
94 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
95 bnc->bgp->vrf_id);
96 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
97 ifp->ll_type == ZEBRA_LLT_IP6GRE))
98 break;
99 }
100 }
101 if (!ifp)
102 return false;
103
104 if (CHECK_FLAG(path->attr->rmap_change_flags,
105 BATTR_RMAP_L3VPN_ACCEPT_GRE))
106 return true;
107
108 return false;
109}
110
111static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
112 struct bgp_path_info *path)
960035b2 113{
7f8c7d91 114 /*
1bb550b6 115 * - In the case of MPLS-VPN, the label is learned from LDP or other
7f8c7d91
HS
116 * protocols, and nexthop tracking is enabled for the label.
117 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
1bb550b6 118 * - In the case of SRv6-VPN, we need to track the reachability to the
7f8c7d91
HS
119 * SID (in other words, IPv6 address). As in MPLS, we need to record
120 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
121 * currently not implemented, and this function assumes that all
122 * Transit routes for SRv6-VPN are valid.
1bb550b6 123 * - Otherwise check for mpls-gre acceptance
7f8c7d91 124 */
1bb550b6
PG
125 return (bgp_zebra_num_connects() == 0 ||
126 (bnc && (bnc->nexthop_num > 0 &&
46dbf9d0
DA
127 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
128 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
1bb550b6 129 bnc->bgp->srv6_enabled ||
4cd690ae 130 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
1bb550b6 131 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
960035b2
PZ
132}
133
d62a17ae 134static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
fb018d25 135{
d5c4bac9 136 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
036f482f
DA
137 if (BGP_DEBUG(nht, NHT))
138 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
139 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 140 bnc->bgp->name_pretty);
e37e1e27
PR
141 /* only unregister if this is the last nh for this prefix*/
142 if (!bnc_existing_for_prefix(bnc))
23f60ffd 143 unregister_zebra_rnh(bnc);
d62a17ae 144 bnc_free(bnc);
fb018d25 145 }
fb018d25
DS
146}
147
4b7e6066 148void bgp_unlink_nexthop(struct bgp_path_info *path)
f9164b1d 149{
d62a17ae 150 struct bgp_nexthop_cache *bnc = path->nexthop;
151
152 if (!bnc)
153 return;
f9164b1d 154
7f040da1 155 path_nh_map(path, NULL, false);
f9164b1d 156
d62a17ae 157 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
158}
159
996319e6
DS
160void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
161{
162 struct prefix pp;
163 struct prefix pt;
164 struct bgp_nexthop_cache *bncp, *bnct;
165 afi_t afi;
35aae5c9 166 ifindex_t ifindex = 0;
996319e6
DS
167
168 if (!sockunion2hostprefix(&from->su, &pp))
169 return;
170
35aae5c9
DS
171 /*
172 * Gather the ifindex for if up/down events to be
173 * tagged into this fun
174 */
175 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
176 ifindex = from->su.sin6.sin6_scope_id;
177
996319e6 178 afi = family2afi(pp.family);
35aae5c9 179 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
996319e6
DS
180
181 if (!sockunion2hostprefix(&to->su, &pt))
182 return;
183
35aae5c9
DS
184 /*
185 * Gather the ifindex for if up/down events to be
186 * tagged into this fun
187 */
188 ifindex = 0;
189 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
190 ifindex = to->su.sin6.sin6_scope_id;
191 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
996319e6
DS
192
193 if (bnct != bncp)
194 return;
195
196 if (bnct)
197 bnct->nht_info = to;
198}
199
51f3216b
PJD
200/*
201 * Returns the bnc whose bnc->nht_info matches the LL peer by
202 * looping through the IPv6 nexthop table
203 */
204static struct bgp_nexthop_cache *
205bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
206{
207 struct bgp_nexthop_cache *bnc;
208
209 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
210 bnc) {
211 if (bnc->nht_info == peer) {
212 if (BGP_DEBUG(nht, NHT)) {
213 zlog_debug(
214 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
215 &bnc->prefix, bnc->ifindex,
216 bnc->srte_color, bnc, peer->host,
217 peer->bgp->name_pretty, peer);
218 }
219 return bnc;
220 }
221 }
222
223 if (BGP_DEBUG(nht, NHT))
224 zlog_debug(
225 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
226 peer->host, peer->bgp->name_pretty, peer);
227
228 return NULL;
229}
230
d62a17ae 231void bgp_unlink_nexthop_by_peer(struct peer *peer)
f9164b1d 232{
d62a17ae 233 struct prefix p;
d62a17ae 234 struct bgp_nexthop_cache *bnc;
235 afi_t afi = family2afi(peer->su.sa.sa_family);
35aae5c9 236 ifindex_t ifindex = 0;
d62a17ae 237
51f3216b
PJD
238 if (!sockunion2hostprefix(&peer->su, &p)) {
239 /*
240 * In scenarios where unnumbered BGP session is brought
241 * down by shutting down the interface before unconfiguring
242 * the BGP neighbor, neighbor information in peer->su.sa
243 * will be cleared when the interface is shutdown. So
244 * during the deletion of unnumbered bgp peer, above check
245 * will return true. Therefore, in this case,BGP needs to
246 * find the bnc whose bnc->nht_info matches the
247 * peer being deleted and free it.
248 */
249 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
250 } else {
251 /*
252 * Gather the ifindex for if up/down events to be
253 * tagged into this fun
254 */
255 if (afi == AFI_IP6 &&
256 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
257 ifindex = peer->su.sin6.sin6_scope_id;
258 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
259 ifindex);
260 }
261
14315f2d 262 if (!bnc)
d62a17ae 263 return;
264
d62a17ae 265 /* cleanup the peer reference */
266 bnc->nht_info = NULL;
267
268 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
269}
270
960035b2
PZ
271/*
272 * A route and its nexthop might belong to different VRFs. Therefore,
273 * we need both the bgp_route and bgp_nexthop pointers.
274 */
275int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
4053e952 276 afi_t afi, safi_t safi, struct bgp_path_info *pi,
654a5978
PG
277 struct peer *peer, int connected,
278 const struct prefix *orig_prefix)
fb018d25 279{
f663c581 280 struct bgp_nexthop_cache_head *tree = NULL;
d62a17ae 281 struct bgp_nexthop_cache *bnc;
ac2f64d3 282 struct bgp_path_info *bpi_ultimate;
d62a17ae 283 struct prefix p;
545aeef1 284 uint32_t srte_color = 0;
d62a17ae 285 int is_bgp_static_route = 0;
8761cd6d 286 ifindex_t ifindex = 0;
d62a17ae 287
40381db7
DS
288 if (pi) {
289 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
290 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 291 ? 1
292 : 0;
293
294 /* Since Extended Next-hop Encoding (RFC5549) support, we want
295 to derive
296 address-family from the next-hop. */
297 if (!is_bgp_static_route)
7226bc40
TA
298 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
299 : AFI_IP;
d62a17ae 300
92d6f769
K
301 /* Validation for the ipv4 mapped ipv6 nexthop. */
302 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
303 afi = AFI_IP;
304 }
305
2951a7a4 306 /* This will return true if the global IPv6 NH is a link local
d62a17ae 307 * addr */
40381db7 308 if (make_prefix(afi, pi, &p) < 0)
d62a17ae 309 return 1;
545aeef1 310
654a5978
PG
311 if (!is_bgp_static_route && orig_prefix
312 && prefix_same(&p, orig_prefix)) {
313 if (BGP_DEBUG(nht, NHT)) {
314 zlog_debug(
315 "%s(%pFX): prefix loops through itself",
316 __func__, &p);
317 }
318 return 0;
319 }
320
545aeef1 321 srte_color = pi->attr->srte_color;
d62a17ae 322 } else if (peer) {
8761cd6d
DS
323 /*
324 * Gather the ifindex for if up/down events to be
325 * tagged into this fun
326 */
35aae5c9
DS
327 if (afi == AFI_IP6 &&
328 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
8761cd6d 329 ifindex = peer->su.sin6.sin6_scope_id;
35aae5c9
DS
330 if (ifindex == 0) {
331 if (BGP_DEBUG(nht, NHT)) {
332 zlog_debug(
333 "%s: Unable to locate ifindex, waiting till we have one",
334 peer->conf_if);
335 }
336 return 0;
337 }
338 }
8761cd6d 339
d62a17ae 340 if (!sockunion2hostprefix(&peer->su, &p)) {
341 if (BGP_DEBUG(nht, NHT)) {
342 zlog_debug(
343 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
15569c58 344 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 345 }
346 return 0;
347 }
348 } else
349 return 0;
350
351 if (is_bgp_static_route)
f663c581 352 tree = &bgp_nexthop->import_check_table[afi];
d62a17ae 353 else
f663c581 354 tree = &bgp_nexthop->nexthop_cache_table[afi];
d62a17ae 355
35aae5c9 356 bnc = bnc_find(tree, &p, srte_color, ifindex);
14315f2d 357 if (!bnc) {
35aae5c9 358 bnc = bnc_new(tree, &p, srte_color, ifindex);
960035b2 359 bnc->bgp = bgp_nexthop;
036f482f
DA
360 if (BGP_DEBUG(nht, NHT))
361 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
362 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 363 bnc->bgp->name_pretty, peer);
4115b296 364 } else {
036f482f 365 if (BGP_DEBUG(nht, NHT))
4115b296 366 zlog_debug(
036f482f
DA
367 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
368 &bnc->prefix, bnc->ifindex,
369 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
370 bnc->path_count, bnc->nht_info);
fc9a856f 371 }
d62a17ae 372
021b6596
AD
373 if (pi && is_route_parent_evpn(pi))
374 bnc->is_evpn_gwip_nexthop = true;
375
2bb8b49c 376 if (is_bgp_static_route) {
d62a17ae 377 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
378
379 /* If we're toggling the type, re-register */
892fedb6 380 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
d62a17ae 381 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
382 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
383 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
384 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
892fedb6
DA
385 } else if ((!CHECK_FLAG(bgp_route->flags,
386 BGP_FLAG_IMPORT_CHECK))
d62a17ae 387 && CHECK_FLAG(bnc->flags,
388 BGP_STATIC_ROUTE_EXACT_MATCH)) {
389 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
390 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
391 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
392 }
078430f6 393 }
d62a17ae 394 /* When nexthop is already known, but now requires 'connected'
395 * resolution,
396 * re-register it. The reverse scenario where the nexthop currently
397 * requires
398 * 'connected' resolution does not need a re-register (i.e., we treat
399 * 'connected-required' as an override) except in the scenario where
400 * this
401 * is actually a case of tracking a peer for connectivity (e.g., after
402 * disable connected-check).
403 * NOTE: We don't track the number of paths separately for 'connected-
404 * required' vs 'connected-not-required' as this change is not a common
405 * scenario.
406 */
407 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
408 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
409 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
410 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
2bb8b49c
DS
411 } else if (peer && !connected
412 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
d62a17ae 413 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
414 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
415 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
078430f6 416 }
4115b296 417 if (peer && (bnc->ifindex != ifindex)) {
418 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
419 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
420 bnc->ifindex = ifindex;
421 }
960035b2 422 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
1ee0a2df
DS
423 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
424 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
b54892e0 425 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
f663c581 426 && !is_default_host_route(&bnc->prefix))
23f60ffd 427 register_zebra_rnh(bnc);
1eb6c3ea 428
40381db7 429 if (pi && pi->nexthop != bnc) {
d62a17ae 430 /* Unlink from existing nexthop cache, if any. This will also
431 * free
432 * the nexthop cache entry, if appropriate.
433 */
40381db7 434 bgp_unlink_nexthop(pi);
d62a17ae 435
7f040da1
DS
436 /* updates NHT pi list reference */
437 path_nh_map(pi, bnc, true);
d62a17ae 438
ac2f64d3 439 bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
d62a17ae 440 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
ac2f64d3
LS
441 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
442 bnc->metric;
443 else if (bpi_ultimate->extra)
444 bpi_ultimate->extra->igpmetric = 0;
996319e6
DS
445 } else if (peer) {
446 /*
4667220e 447 * Let's not accidentally save the peer data for a peer
996319e6
DS
448 * we are going to throw away in a second or so.
449 * When we come back around we'll fix up this
450 * data properly in replace_nexthop_by_peer
451 */
452 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
453 bnc->nht_info = (void *)peer; /* NHT peer reference */
454 }
d62a17ae 455
456 /*
457 * We are cheating here. Views have no associated underlying
458 * ability to detect nexthops. So when we have a view
459 * just tell everyone the nexthop is valid
460 */
960035b2 461 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
d62a17ae 462 return 1;
1bb550b6
PG
463 else if (safi == SAFI_UNICAST && pi &&
464 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
465 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
466 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
467 else
d62a17ae 468 return (bgp_isvalid_nexthop(bnc));
fb018d25
DS
469}
470
d62a17ae 471void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
9a233a02 472{
d62a17ae 473 struct bgp_nexthop_cache *bnc;
474 struct prefix p;
35aae5c9 475 ifindex_t ifindex = 0;
d62a17ae 476
477 if (!peer)
478 return;
479
51f3216b
PJD
480 /*
481 * In case the below check evaluates true and if
482 * the bnc has not been freed at this point, then
483 * we might have to do something similar to what's
484 * done in bgp_unlink_nexthop_by_peer(). Since
485 * bgp_unlink_nexthop_by_peer() loops through the
486 * nodes of V6 nexthop cache to find the bnc, it is
487 * currently not being called here.
488 */
d62a17ae 489 if (!sockunion2hostprefix(&peer->su, &p))
490 return;
35aae5c9
DS
491 /*
492 * Gather the ifindex for if up/down events to be
493 * tagged into this fun
494 */
495 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
496 ifindex = peer->su.sin6.sin6_scope_id;
f663c581 497 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
35aae5c9 498 &p, 0, ifindex);
14315f2d
DS
499 if (!bnc) {
500 if (BGP_DEBUG(nht, NHT))
8c1a4c10 501 zlog_debug(
f663c581 502 "Cannot find connected NHT node for peer %s(%s)",
8c1a4c10 503 peer->host, peer->bgp->name_pretty);
14315f2d
DS
504 return;
505 }
d62a17ae 506
507 if (bnc->nht_info != peer) {
508 if (BGP_DEBUG(nht, NHT))
509 zlog_debug(
8c1a4c10
DS
510 "Connected NHT %p node for peer %s(%s) points to %p",
511 bnc, peer->host, bnc->bgp->name_pretty,
512 bnc->nht_info);
d62a17ae 513 return;
514 }
515
516 bnc->nht_info = NULL;
517
518 if (LIST_EMPTY(&(bnc->paths))) {
519 if (BGP_DEBUG(nht, NHT))
8c1a4c10
DS
520 zlog_debug(
521 "Freeing connected NHT node %p for peer %s(%s)",
522 bnc, peer->host, bnc->bgp->name_pretty);
23f60ffd 523 unregister_zebra_rnh(bnc);
d62a17ae 524 bnc_free(bnc);
525 }
9a233a02
DS
526}
527
545aeef1 528static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
9f002fa5
DS
529 struct zapi_route *nhr,
530 bool import_check)
fb018d25 531{
d62a17ae 532 struct nexthop *nexthop;
533 struct nexthop *oldnh;
534 struct nexthop *nhlist_head = NULL;
535 struct nexthop *nhlist_tail = NULL;
d62a17ae 536 int i;
021b6596 537 bool evpn_resolved = false;
14315f2d 538
083ec940 539 bnc->last_update = monotime(NULL);
d62a17ae 540 bnc->change_flags = 0;
d62a17ae 541
542 /* debug print the input */
987a720a
DS
543 if (BGP_DEBUG(nht, NHT)) {
544 char bnc_buf[BNC_FLAG_DUMP_SIZE];
545
d62a17ae 546 zlog_debug(
35aae5c9 547 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
2dbe669b 548 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
35aae5c9 549 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
987a720a
DS
550 nhr->nexthop_num, bnc->nexthop_num,
551 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
552 sizeof(bnc_buf)));
553 }
d62a17ae 554
545aeef1 555 if (nhr->metric != bnc->metric)
d62a17ae 556 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
557
545aeef1 558 if (nhr->nexthop_num != bnc->nexthop_num)
d62a17ae 559 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
560
9f002fa5
DS
561 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
562 !prefix_same(&bnc->prefix, &nhr->prefix))) {
563 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
564 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
565 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
566 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
567
568 bnc_nexthop_free(bnc);
569 bnc->nexthop = NULL;
570
571 if (BGP_DEBUG(nht, NHT))
572 zlog_debug(
573 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
574 __func__, &bnc->prefix, &nhr->prefix);
575 } else if (nhr->nexthop_num) {
6137a77d
DS
576 struct peer *peer = bnc->nht_info;
577
d62a17ae 578 /* notify bgp fsm if nbr ip goes from invalid->valid */
579 if (!bnc->nexthop_num)
580 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
581
021b6596
AD
582 if (!bnc->is_evpn_gwip_nexthop)
583 bnc->flags |= BGP_NEXTHOP_VALID;
545aeef1
RW
584 bnc->metric = nhr->metric;
585 bnc->nexthop_num = nhr->nexthop_num;
4a749e2c 586
960035b2
PZ
587 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
588
545aeef1 589 for (i = 0; i < nhr->nexthop_num; i++) {
960035b2
PZ
590 int num_labels = 0;
591
545aeef1 592 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
d62a17ae 593
6137a77d
DS
594 /*
595 * Turn on RA for the v6 nexthops
596 * we receive from bgp. This is to allow us
597 * to work with v4 routing over v6 nexthops
598 */
687a2b5d
DS
599 if (peer && !peer->ifp
600 && CHECK_FLAG(peer->flags,
601 PEER_FLAG_CAPABILITY_ENHE)
545aeef1 602 && nhr->prefix.family == AF_INET6
65f803e8 603 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
6137a77d
DS
604 struct interface *ifp;
605
606 ifp = if_lookup_by_index(nexthop->ifindex,
607 nexthop->vrf_id);
8c9769e0
DS
608 if (ifp)
609 zclient_send_interface_radv_req(
610 zclient, nexthop->vrf_id, ifp,
611 true,
612 BGP_UNNUM_DEFAULT_RA_INTERVAL);
6137a77d 613 }
960035b2
PZ
614 /* There is at least one label-switched path */
615 if (nexthop->nh_label &&
616 nexthop->nh_label->num_labels) {
617
618 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
619 num_labels = nexthop->nh_label->num_labels;
620 }
621
d62a17ae 622 if (BGP_DEBUG(nht, NHT)) {
623 char buf[NEXTHOP_STRLEN];
624 zlog_debug(
960035b2
PZ
625 " nhop via %s (%d labels)",
626 nexthop2str(nexthop, buf, sizeof(buf)),
627 num_labels);
d62a17ae 628 }
629
630 if (nhlist_tail) {
631 nhlist_tail->next = nexthop;
632 nhlist_tail = nexthop;
633 } else {
634 nhlist_tail = nexthop;
635 nhlist_head = nexthop;
636 }
637
638 /* No need to evaluate the nexthop if we have already
639 * determined
640 * that there has been a change.
641 */
642 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
643 continue;
644
645 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
78fba41b 646 if (nexthop_same(oldnh, nexthop))
d62a17ae 647 break;
648
649 if (!oldnh)
650 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
651 }
652 bnc_nexthop_free(bnc);
653 bnc->nexthop = nhlist_head;
021b6596
AD
654
655 /*
656 * Gateway IP nexthop is L3 reachable. Mark it as
657 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
658 * remote EVPN RT-2.
659 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
660 * When its mapping with EVPN RT-2 is established, unset
661 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
662 */
663 if (bnc->is_evpn_gwip_nexthop) {
664 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
665
511211bf 666 if (BGP_DEBUG(nht, NHT))
021b6596 667 zlog_debug(
511211bf
DA
668 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
669 &bnc->prefix,
021b6596
AD
670 (evpn_resolved ? "successful"
671 : "failed"));
021b6596
AD
672
673 if (evpn_resolved) {
674 bnc->flags |= BGP_NEXTHOP_VALID;
675 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
676 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
677 } else {
678 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
679 bnc->flags &= ~BGP_NEXTHOP_VALID;
680 }
681 }
d62a17ae 682 } else {
021b6596 683 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
d62a17ae 684 bnc->flags &= ~BGP_NEXTHOP_VALID;
c3b95419 685 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
545aeef1 686 bnc->nexthop_num = nhr->nexthop_num;
d62a17ae 687
688 /* notify bgp fsm if nbr ip goes from valid->invalid */
689 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
690
691 bnc_nexthop_free(bnc);
692 bnc->nexthop = NULL;
693 }
694
695 evaluate_paths(bnc);
fb018d25
DS
696}
697
8761cd6d
DS
698static void bgp_nht_ifp_table_handle(struct bgp *bgp,
699 struct bgp_nexthop_cache_head *table,
700 struct interface *ifp, bool up)
701{
702 struct bgp_nexthop_cache *bnc;
703
704 frr_each (bgp_nexthop_cache, table, bnc) {
705 if (bnc->ifindex != ifp->ifindex)
706 continue;
707
083ec940 708 bnc->last_update = monotime(NULL);
8761cd6d
DS
709 bnc->change_flags = 0;
710
474cfe4a
DS
711 /*
712 * For interface based routes ( ala the v6 LL routes
713 * that this was written for ) the metric received
714 * for the connected route is 0 not 1.
715 */
716 bnc->metric = 0;
8761cd6d
DS
717 if (up) {
718 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
719 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
8761cd6d
DS
720 bnc->nexthop_num = 1;
721 } else {
722 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
723 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
724 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
725 bnc->nexthop_num = 0;
8761cd6d
DS
726 }
727
728 evaluate_paths(bnc);
729 }
730}
731static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
732{
733 struct bgp *bgp;
734
096f7609 735 bgp = ifp->vrf->info;
8761cd6d
DS
736 if (!bgp)
737 return;
738
7f2e9cce
DS
739 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
740 up);
741 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
742 up);
8761cd6d
DS
743 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
744 up);
745 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
746 up);
747}
748
749void bgp_nht_ifp_up(struct interface *ifp)
750{
751 bgp_nht_ifp_handle(ifp, true);
752}
753
754void bgp_nht_ifp_down(struct interface *ifp)
755{
756 bgp_nht_ifp_handle(ifp, false);
757}
758
cc9f21da 759static void bgp_nht_ifp_initial(struct thread *thread)
8761cd6d
DS
760{
761 ifindex_t ifindex = THREAD_VAL(thread);
0b52b75a
IR
762 struct bgp *bgp = THREAD_ARG(thread);
763 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
8761cd6d
DS
764
765 if (!ifp)
cc9f21da 766 return;
8761cd6d 767
4115b296 768 if (BGP_DEBUG(nht, NHT))
769 zlog_debug(
770 "Handle NHT initial update for Intf %s(%d) status %s",
771 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
772
8761cd6d
DS
773 if (if_is_up(ifp))
774 bgp_nht_ifp_up(ifp);
775 else
776 bgp_nht_ifp_down(ifp);
8761cd6d
DS
777}
778
779/*
780 * So the bnc code has the ability to handle interface up/down
781 * events to properly handle v6 LL peering.
782 * What is happening here:
783 * The event system for peering expects the nht code to
784 * report on the tracking events after we move to active
785 * So let's give the system a chance to report on that event
786 * in a manner that is expected.
787 */
788void bgp_nht_interface_events(struct peer *peer)
789{
790 struct bgp *bgp = peer->bgp;
791 struct bgp_nexthop_cache_head *table;
792 struct bgp_nexthop_cache *bnc;
793 struct prefix p;
35aae5c9 794 ifindex_t ifindex = 0;
8761cd6d
DS
795
796 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
797 return;
798
799 if (!sockunion2hostprefix(&peer->su, &p))
800 return;
35aae5c9
DS
801 /*
802 * Gather the ifindex for if up/down events to be
803 * tagged into this fun
804 */
805 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
806 ifindex = peer->su.sin6.sin6_scope_id;
8761cd6d
DS
807
808 table = &bgp->nexthop_cache_table[AFI_IP6];
35aae5c9 809 bnc = bnc_find(table, &p, 0, ifindex);
8761cd6d
DS
810 if (!bnc)
811 return;
812
813 if (bnc->ifindex)
0b52b75a 814 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
8761cd6d
DS
815 bnc->ifindex, NULL);
816}
817
545aeef1
RW
818void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
819{
820 struct bgp_nexthop_cache_head *tree = NULL;
b8210849 821 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
545aeef1 822 struct bgp *bgp;
06e4e901 823 struct prefix match;
545aeef1
RW
824 struct zapi_route nhr;
825 afi_t afi;
826
827 bgp = bgp_lookup_by_vrf_id(vrf_id);
828 if (!bgp) {
829 flog_err(
830 EC_BGP_NH_UPD,
831 "parse nexthop update: instance not found for vrf_id %u",
832 vrf_id);
833 return;
834 }
835
06e4e901 836 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
cc42c4f0
DS
837 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
838 bgp->name_pretty);
545aeef1
RW
839 return;
840 }
841
06e4e901 842 afi = family2afi(match.family);
b8210849 843 tree = &bgp->nexthop_cache_table[afi];
545aeef1 844
35aae5c9 845 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
2bb8b49c
DS
846 if (!bnc_nhc) {
847 if (BGP_DEBUG(nht, NHT))
848 zlog_debug(
849 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
850 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
851 } else
9f002fa5 852 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
b8210849
DS
853
854 tree = &bgp->import_check_table[afi];
855
35aae5c9 856 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
2bb8b49c
DS
857 if (!bnc_import) {
858 if (BGP_DEBUG(nht, NHT))
859 zlog_debug(
860 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
861 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
862 } else
d00a5f6b 863 bgp_process_nexthop_update(bnc_import, &nhr, true);
1e24860b 864
545aeef1
RW
865 /*
866 * HACK: if any BGP route is dependant on an SR-policy that doesn't
867 * exist, zebra will never send NH updates relative to that policy. In
868 * that case, whenever we receive an update about a colorless NH, update
869 * the corresponding colorful NHs that share the same endpoint but that
870 * are inactive. This ugly hack should work around the problem at the
871 * cost of a performance pernalty. Long term, what should be done is to
872 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
873 * which should provide a better infrastructure to solve this issue in
874 * a more efficient and elegant way.
875 */
b8210849 876 if (nhr.srte_color == 0 && bnc_nhc) {
545aeef1
RW
877 struct bgp_nexthop_cache *bnc_iter;
878
879 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
880 bnc_iter) {
d00a5f6b
DS
881 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
882 bnc_iter->srte_color == 0 ||
883 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
545aeef1
RW
884 continue;
885
9f002fa5 886 bgp_process_nexthop_update(bnc_iter, &nhr, false);
545aeef1
RW
887 }
888 }
889}
890
ee7ca6c0 891/*
892 * Cleanup nexthop registration and status information for BGP nexthops
893 * pertaining to this VRF. This is invoked upon VRF deletion.
894 */
895void bgp_cleanup_nexthops(struct bgp *bgp)
896{
f663c581
RW
897 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
898 struct bgp_nexthop_cache *bnc;
ee7ca6c0 899
f663c581
RW
900 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
901 bnc) {
ee7ca6c0 902 /* Clear relevant flags. */
903 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
904 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
905 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
021b6596 906 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
ee7ca6c0 907 }
908 }
909}
910
fb018d25
DS
911/**
912 * make_prefix - make a prefix structure from the path (essentially
913 * path's node.
914 */
40381db7 915static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
fb018d25 916{
078430f6 917
40381db7
DS
918 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
919 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 920 ? 1
921 : 0;
9bcb3eef
DS
922 struct bgp_dest *net = pi->net;
923 const struct prefix *p_orig = bgp_dest_get_prefix(net);
92d6f769 924 struct in_addr ipv4;
0378bcaa
PG
925
926 if (p_orig->family == AF_FLOWSPEC) {
927 if (!pi->peer)
928 return -1;
929 return bgp_flowspec_get_first_nh(pi->peer->bgp,
1840384b 930 pi, p, afi);
0378bcaa 931 }
d62a17ae 932 memset(p, 0, sizeof(struct prefix));
933 switch (afi) {
934 case AFI_IP:
935 p->family = AF_INET;
936 if (is_bgp_static) {
b54892e0
DS
937 p->u.prefix4 = p_orig->u.prefix4;
938 p->prefixlen = p_orig->prefixlen;
d62a17ae 939 } else {
92d6f769
K
940 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
941 ipv4_mapped_ipv6_to_ipv4(
942 &pi->attr->mp_nexthop_global, &ipv4);
943 p->u.prefix4 = ipv4;
944 p->prefixlen = IPV4_MAX_BITLEN;
945 } else {
7226bc40
TA
946 if (p_orig->family == AF_EVPN)
947 p->u.prefix4 =
948 pi->attr->mp_nexthop_global_in;
949 else
950 p->u.prefix4 = pi->attr->nexthop;
92d6f769
K
951 p->prefixlen = IPV4_MAX_BITLEN;
952 }
d62a17ae 953 }
954 break;
955 case AFI_IP6:
d62a17ae 956 p->family = AF_INET6;
957
958 if (is_bgp_static) {
b54892e0
DS
959 p->u.prefix6 = p_orig->u.prefix6;
960 p->prefixlen = p_orig->prefixlen;
d62a17ae 961 } else {
606fdbb1
DA
962 /* If we receive MP_REACH nexthop with ::(LL)
963 * or LL(LL), use LL address as nexthop cache.
964 */
965 if (pi->attr->mp_nexthop_len
966 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
967 && (IN6_IS_ADDR_UNSPECIFIED(
968 &pi->attr->mp_nexthop_global)
969 || IN6_IS_ADDR_LINKLOCAL(
970 &pi->attr->mp_nexthop_global)))
971 p->u.prefix6 = pi->attr->mp_nexthop_local;
17ef5a93
PG
972 /* If we receive MR_REACH with (GA)::(LL)
973 * then check for route-map to choose GA or LL
974 */
975 else if (pi->attr->mp_nexthop_len
976 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
2bb8b49c 977 if (pi->attr->mp_nexthop_prefer_global)
17ef5a93
PG
978 p->u.prefix6 =
979 pi->attr->mp_nexthop_global;
980 else
981 p->u.prefix6 =
982 pi->attr->mp_nexthop_local;
983 } else
606fdbb1 984 p->u.prefix6 = pi->attr->mp_nexthop_global;
d62a17ae 985 p->prefixlen = IPV6_MAX_BITLEN;
986 }
987 break;
988 default:
989 if (BGP_DEBUG(nht, NHT)) {
990 zlog_debug(
991 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
15569c58 992 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 993 }
994 break;
65740e1b 995 }
d62a17ae 996 return 0;
fb018d25
DS
997}
998
999/**
078430f6 1000 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
fb018d25
DS
1001 * command to Zebra.
1002 * ARGUMENTS:
1003 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
078430f6 1004 * int command -- command to send to zebra
fb018d25
DS
1005 * RETURNS:
1006 * void.
1007 */
d62a17ae 1008static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
fb018d25 1009{
3c192540 1010 bool exact_match = false;
ed6cec97 1011 bool resolve_via_default = false;
d62a17ae 1012 int ret;
1013
3c192540 1014 if (!zclient)
d62a17ae 1015 return;
1016
1017 /* Don't try to register if Zebra doesn't know of this instance. */
bb4ef1ae
DS
1018 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1019 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
1020 zlog_debug(
1021 "%s: No zebra instance to talk to, not installing NHT entry",
1022 __func__);
d62a17ae 1023 return;
bb4ef1ae 1024 }
d62a17ae 1025
1ee0a2df
DS
1026 if (!bgp_zebra_num_connects()) {
1027 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
1028 zlog_debug(
1029 "%s: We have not connected yet, cannot send nexthops",
1030 __func__);
1ee0a2df 1031 }
ed6cec97
DS
1032 if (command == ZEBRA_NEXTHOP_REGISTER) {
1033 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1034 exact_match = true;
1035 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1036 resolve_via_default = true;
1037 }
d62a17ae 1038
f663c581
RW
1039 if (BGP_DEBUG(zebra, ZEBRA))
1040 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1041 zserv_command_string(command), &bnc->prefix,
1042 bnc->bgp->name_pretty);
960035b2 1043
eb3c9d97
DL
1044 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1045 exact_match, resolve_via_default,
1046 bnc->bgp->vrf_id);
a6522038 1047 if (ret == ZCLIENT_SEND_FAILURE) {
e50f7cfd 1048 flog_warn(EC_BGP_ZEBRA_SEND,
f162a5b9 1049 "sendmsg_nexthop: zclient_send_message() failed");
a6522038 1050 return;
1051 }
d62a17ae 1052
3d174ce0 1053 if (command == ZEBRA_NEXTHOP_REGISTER)
d62a17ae 1054 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
3d174ce0 1055 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
d62a17ae 1056 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1057 return;
fb018d25
DS
1058}
1059
1060/**
078430f6
DS
1061 * register_zebra_rnh - register a NH/route with Zebra for notification
1062 * when the route or the route to the nexthop changes.
fb018d25 1063 * ARGUMENTS:
078430f6 1064 * struct bgp_nexthop_cache *bnc
fb018d25
DS
1065 * RETURNS:
1066 * void.
1067 */
23f60ffd 1068static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
fb018d25 1069{
d62a17ae 1070 /* Check if we have already registered */
1071 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1072 return;
8761cd6d
DS
1073
1074 if (bnc->ifindex) {
1075 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1076 return;
1077 }
1078
3d174ce0 1079 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
fb018d25
DS
1080}
1081
1082/**
078430f6 1083 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
fb018d25 1084 * ARGUMENTS:
078430f6 1085 * struct bgp_nexthop_cache *bnc
fb018d25
DS
1086 * RETURNS:
1087 * void.
1088 */
23f60ffd 1089static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
fb018d25 1090{
d62a17ae 1091 /* Check if we have already registered */
1092 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1093 return;
1094
8761cd6d
DS
1095 if (bnc->ifindex) {
1096 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1097 return;
1098 }
1099
3d174ce0 1100 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
fb018d25
DS
1101}
1102
1103/**
1104 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1105 * ARGUMENTS:
1106 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1107 * RETURNS:
1108 * void.
1109 */
021b6596 1110void evaluate_paths(struct bgp_nexthop_cache *bnc)
fb018d25 1111{
9bcb3eef 1112 struct bgp_dest *dest;
4b7e6066 1113 struct bgp_path_info *path;
ac2f64d3 1114 struct bgp_path_info *bpi_ultimate;
d62a17ae 1115 int afi;
1116 struct peer *peer = (struct peer *)bnc->nht_info;
1117 struct bgp_table *table;
1118 safi_t safi;
960035b2 1119 struct bgp *bgp_path;
b54892e0 1120 const struct prefix *p;
d62a17ae 1121
1122 if (BGP_DEBUG(nht, NHT)) {
987a720a 1123 char bnc_buf[BNC_FLAG_DUMP_SIZE];
df2a41a9 1124 char chg_buf[BNC_FLAG_DUMP_SIZE];
987a720a 1125
d62a17ae 1126 zlog_debug(
036f482f
DA
1127 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1128 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 1129 bnc->bgp->name_pretty,
df2a41a9
DS
1130 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1131 sizeof(bnc_buf)),
1132 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1133 sizeof(bnc_buf)));
fb018d25
DS
1134 }
1135
a2addae8 1136 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
d62a17ae 1137 if (!(path->type == ZEBRA_ROUTE_BGP
1138 && ((path->sub_type == BGP_ROUTE_NORMAL)
960035b2
PZ
1139 || (path->sub_type == BGP_ROUTE_STATIC)
1140 || (path->sub_type == BGP_ROUTE_IMPORTED))))
d62a17ae 1141 continue;
1142
9bcb3eef
DS
1143 dest = path->net;
1144 assert(dest && bgp_dest_table(dest));
1145 p = bgp_dest_get_prefix(dest);
b54892e0 1146 afi = family2afi(p->family);
9bcb3eef 1147 table = bgp_dest_table(dest);
d62a17ae 1148 safi = table->safi;
1149
960035b2
PZ
1150 /*
1151 * handle routes from other VRFs (they can have a
1152 * nexthop in THIS VRF). bgp_path is the bgp instance
1153 * that owns the route referencing this nexthop.
1154 */
1155 bgp_path = table->bgp;
1156
1157 /*
1158 * Path becomes valid/invalid depending on whether the nexthop
d62a17ae 1159 * reachable/unreachable.
960035b2
PZ
1160 *
1161 * In case of unicast routes that were imported from vpn
1162 * and that have labels, they are valid only if there are
1163 * nexthops with labels
a2299aba
AD
1164 *
1165 * If the nexthop is EVPN gateway-IP,
1166 * do not check for a valid label.
d62a17ae 1167 */
960035b2 1168
34ea39b6 1169 bool bnc_is_valid_nexthop = false;
1170 bool path_valid = false;
960035b2 1171
021b6596
AD
1172 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1173 && path->extra && path->extra->num_labels
1174 && (path->attr->evpn_overlay.type
1175 != OVERLAY_INDEX_GATEWAY_IP)) {
960035b2 1176 bnc_is_valid_nexthop =
1bb550b6
PG
1177 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1178 : false;
960035b2 1179 } else {
e7cbe5e5
NT
1180 if (bgp_update_martian_nexthop(
1181 bnc->bgp, afi, safi, path->type,
9bcb3eef 1182 path->sub_type, path->attr, dest)) {
e7cbe5e5
NT
1183 if (BGP_DEBUG(nht, NHT))
1184 zlog_debug(
56ca3b5b 1185 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
9bcb3eef 1186 __func__, dest, bgp_path->name);
e7cbe5e5
NT
1187 } else
1188 bnc_is_valid_nexthop =
34ea39b6 1189 bgp_isvalid_nexthop(bnc) ? true : false;
960035b2
PZ
1190 }
1191
9e15d76a 1192 if (BGP_DEBUG(nht, NHT)) {
4a8cd6ad
PG
1193
1194 if (dest->pdest) {
1195 char rd_buf[RD_ADDRSTRLEN];
1196
1197 prefix_rd2str(
c4f64ea9
DA
1198 (struct prefix_rd *)bgp_dest_get_prefix(
1199 dest->pdest),
4a8cd6ad
PG
1200 rd_buf, sizeof(rd_buf),
1201 bgp_get_asnotation(bnc->bgp));
1202 zlog_debug(
1203 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
1204 afi, safi, dest, rd_buf,
9e15d76a 1205 bgp_path->name_pretty, path->flags);
4a8cd6ad 1206 } else
9e15d76a 1207 zlog_debug(
56ca3b5b 1208 "... eval path %d/%d %pBD %s flags 0x%x",
9bcb3eef 1209 afi, safi, dest, bgp_path->name_pretty,
9e15d76a 1210 path->flags);
1211 }
d62a17ae 1212
0139efe0 1213 /* Skip paths marked for removal or as history. */
1214 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1215 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1216 continue;
1217
d62a17ae 1218 /* Copy the metric to the path. Will be used for bestpath
1219 * computation */
ac2f64d3 1220 bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
d62a17ae 1221 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
ac2f64d3 1222 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
18ee8310 1223 bnc->metric;
ac2f64d3
LS
1224 else if (bpi_ultimate->extra)
1225 bpi_ultimate->extra->igpmetric = 0;
d62a17ae 1226
1227 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
545aeef1
RW
1228 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1229 || path->attr->srte_color != 0)
1defdda8 1230 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
d62a17ae 1231
d4980edf 1232 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
34ea39b6 1233 if (path_valid != bnc_is_valid_nexthop) {
1234 if (path_valid) {
1235 /* No longer valid, clear flag; also for EVPN
1236 * routes, unimport from VRFs if needed.
1237 */
1238 bgp_aggregate_decrement(bgp_path, p, path, afi,
1239 safi);
9bcb3eef 1240 bgp_path_info_unset_flag(dest, path,
34ea39b6 1241 BGP_PATH_VALID);
1242 if (safi == SAFI_EVPN &&
9bcb3eef 1243 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 1244 bgp_evpn_unimport_route(bgp_path,
9bcb3eef 1245 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 1246 } else {
1247 /* Path becomes valid, set flag; also for EVPN
1248 * routes, import from VRFs if needed.
1249 */
9bcb3eef 1250 bgp_path_info_set_flag(dest, path,
34ea39b6 1251 BGP_PATH_VALID);
1252 bgp_aggregate_increment(bgp_path, p, path, afi,
1253 safi);
1254 if (safi == SAFI_EVPN &&
9bcb3eef 1255 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 1256 bgp_evpn_import_route(bgp_path,
9bcb3eef 1257 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 1258 }
7c312383
AD
1259 }
1260
9bcb3eef 1261 bgp_process(bgp_path, dest, afi, safi);
d62a17ae 1262 }
fc9a856f 1263
1e91f1d1
DS
1264 if (peer) {
1265 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1266
824065c4
DS
1267 if (valid_nexthops) {
1268 /*
1269 * Peering cannot occur across a blackhole nexthop
1270 */
e817f2cc 1271 if (bnc->nexthop_num == 1 && bnc->nexthop
824065c4
DS
1272 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1273 peer->last_reset = PEER_DOWN_WAITING_NHT;
1274 valid_nexthops = 0;
1275 } else
1276 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1277 } else
1e91f1d1
DS
1278 peer->last_reset = PEER_DOWN_WAITING_NHT;
1279
1280 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1281 if (BGP_DEBUG(nht, NHT))
15569c58 1282 zlog_debug(
8761cd6d 1283 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
15569c58 1284 __func__, peer->host,
8761cd6d
DS
1285 peer->bgp->name_pretty,
1286 !!valid_nexthops);
f8dcd38d 1287 bgp_fsm_nht_update(peer, !!valid_nexthops);
1e91f1d1
DS
1288 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1289 }
d62a17ae 1290 }
fc9a856f 1291
d62a17ae 1292 RESET_FLAG(bnc->change_flags);
fb018d25
DS
1293}
1294
1295/**
1296 * path_nh_map - make or break path-to-nexthop association.
1297 * ARGUMENTS:
1298 * path - pointer to the path structure
1299 * bnc - pointer to the nexthop structure
1300 * make - if set, make the association. if unset, just break the existing
1301 * association.
1302 */
7f040da1
DS
1303void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1304 bool make)
fb018d25 1305{
d62a17ae 1306 if (path->nexthop) {
1307 LIST_REMOVE(path, nh_thread);
1308 path->nexthop->path_count--;
1309 path->nexthop = NULL;
1310 }
1311 if (make) {
1312 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1313 path->nexthop = bnc;
1314 path->nexthop->path_count++;
1315 }
fb018d25 1316}
1ee0a2df
DS
1317
1318/*
1319 * This function is called to register nexthops to zebra
1320 * as that we may have tried to install the nexthops
1321 * before we actually have a zebra connection
1322 */
1323void bgp_nht_register_nexthops(struct bgp *bgp)
1324{
f663c581
RW
1325 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1326 struct bgp_nexthop_cache *bnc;
1ee0a2df 1327
f663c581
RW
1328 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1329 bnc) {
23f60ffd 1330 register_zebra_rnh(bnc);
1ee0a2df
DS
1331 }
1332 }
1333}
1ea03b90 1334
b3a3290e 1335void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1ea03b90
DS
1336{
1337 struct bgp *bgp;
1ea03b90
DS
1338 struct bgp_nexthop_cache *bnc;
1339 struct nexthop *nhop;
1340 struct interface *ifp;
1341 struct prefix p;
35aae5c9 1342 ifindex_t ifindex = 0;
1ea03b90
DS
1343
1344 if (peer->ifp)
1345 return;
1346
1347 bgp = peer->bgp;
1ea03b90 1348 if (!sockunion2hostprefix(&peer->su, &p)) {
b3a3290e
DS
1349 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1350 __func__, peer->host);
1ea03b90
DS
1351 return;
1352 }
1353
1354 if (p.family != AF_INET6)
1355 return;
35aae5c9
DS
1356 /*
1357 * Gather the ifindex for if up/down events to be
1358 * tagged into this fun
1359 */
1360 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1361 ifindex = peer->su.sin6.sin6_scope_id;
1ea03b90 1362
35aae5c9 1363 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1ea03b90
DS
1364 if (!bnc)
1365 return;
1366
1367 if (peer != bnc->nht_info)
1368 return;
1369
1370 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
8c9769e0
DS
1371 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1372
1373 if (!ifp)
1374 continue;
1375
1ea03b90
DS
1376 zclient_send_interface_radv_req(zclient,
1377 nhop->vrf_id,
1378 ifp, true,
1379 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1380 }
1381}
b3a3290e
DS
1382
1383void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1384{
1385 struct bgp *bgp;
b3a3290e
DS
1386 struct bgp_nexthop_cache *bnc;
1387 struct nexthop *nhop;
1388 struct interface *ifp;
1389 struct prefix p;
35aae5c9 1390 ifindex_t ifindex = 0;
b3a3290e
DS
1391
1392 if (peer->ifp)
1393 return;
1394
1395 bgp = peer->bgp;
1396
b3a3290e
DS
1397 if (!sockunion2hostprefix(&peer->su, &p)) {
1398 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1399 __func__, peer->host);
1400 return;
1401 }
1402
1403 if (p.family != AF_INET6)
1404 return;
35aae5c9
DS
1405 /*
1406 * Gather the ifindex for if up/down events to be
1407 * tagged into this fun
1408 */
1409 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1410 ifindex = peer->su.sin6.sin6_scope_id;
b3a3290e 1411
35aae5c9 1412 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
b3a3290e
DS
1413 if (!bnc)
1414 return;
1415
1416 if (peer != bnc->nht_info)
1417 return;
1418
1419 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1420 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1421
68cecc3b
DS
1422 if (!ifp)
1423 continue;
1424
b3a3290e
DS
1425 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1426 0);
1427 }
1428}
c589d847
AK
1429
1430/****************************************************************************
1431 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1432 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1433 * left to the application using it.
1434 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1435 * failover of remote ES links.
1436 ***************************************************************************/
1437static bitfield_t bgp_nh_id_bitmap;
8bcb09a1 1438static uint32_t bgp_l3nhg_start;
c589d847 1439
8bcb09a1
AK
1440/* XXX - currently we do nothing on the callbacks */
1441static void bgp_l3nhg_add_cb(const char *name)
1442{
1443}
f3c6dd49
DS
1444
1445static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1446{
1447}
1448
8bcb09a1
AK
1449static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1450 const struct nexthop *nhop)
1451{
1452}
f3c6dd49 1453
8bcb09a1
AK
1454static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1455 const struct nexthop *nhop)
1456{
1457}
f3c6dd49 1458
8bcb09a1 1459static void bgp_l3nhg_del_cb(const char *name)
c589d847 1460{
c589d847
AK
1461}
1462
8bcb09a1 1463static void bgp_l3nhg_zebra_init(void)
c589d847 1464{
8bcb09a1
AK
1465 static bool bgp_l3nhg_zebra_inited;
1466 if (bgp_l3nhg_zebra_inited)
c589d847
AK
1467 return;
1468
8bcb09a1
AK
1469 bgp_l3nhg_zebra_inited = true;
1470 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
f3c6dd49
DS
1471 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1472 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1473 bgp_l3nhg_del_cb);
c589d847
AK
1474}
1475
8bcb09a1 1476
c589d847
AK
1477void bgp_l3nhg_init(void)
1478{
8bcb09a1
AK
1479 uint32_t id_max;
1480
7a8ce9d5 1481 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
8bcb09a1 1482 bf_init(bgp_nh_id_bitmap, id_max);
c589d847 1483 bf_assign_zero_index(bgp_nh_id_bitmap);
8bcb09a1
AK
1484
1485 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1486 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1487 bgp_l3nhg_start + id_max);
c589d847
AK
1488}
1489
1490void bgp_l3nhg_finish(void)
1491{
1492 bf_free(bgp_nh_id_bitmap);
1493}
8bcb09a1
AK
1494
1495uint32_t bgp_l3nhg_id_alloc(void)
1496{
1497 uint32_t nhg_id = 0;
1498
1499 bgp_l3nhg_zebra_init();
1500 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1501 if (nhg_id)
1502 nhg_id += bgp_l3nhg_start;
1503
1504 return nhg_id;
1505}
1506
1507void bgp_l3nhg_id_free(uint32_t nhg_id)
1508{
1509 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1510 return;
1511
1512 nhg_id -= bgp_l3nhg_start;
1513
1514 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1515}