]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_nht.c
Merge pull request #12603 from opensourcerouting/fix/deprecate_bgp_stuff_some
[mirror_frr.git] / bgpd / bgp_nht.c
CommitLineData
fb018d25
DS
1/* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
fb018d25
DS
19 */
20
21#include <zebra.h>
22
23#include "command.h"
24#include "thread.h"
25#include "prefix.h"
26#include "zclient.h"
27#include "stream.h"
28#include "network.h"
29#include "log.h"
30#include "memory.h"
31#include "nexthop.h"
7076bb2f 32#include "vrf.h"
039f3a34 33#include "filter.h"
8bcb09a1 34#include "nexthop_group.h"
fb018d25
DS
35
36#include "bgpd/bgpd.h"
37#include "bgpd/bgp_table.h"
38#include "bgpd/bgp_route.h"
39#include "bgpd/bgp_attr.h"
40#include "bgpd/bgp_nexthop.h"
41#include "bgpd/bgp_debug.h"
14454c9f 42#include "bgpd/bgp_errors.h"
fb018d25 43#include "bgpd/bgp_nht.h"
ffd0c037 44#include "bgpd/bgp_fsm.h"
afbb1c59 45#include "bgpd/bgp_zebra.h"
0378bcaa 46#include "bgpd/bgp_flowspec_util.h"
7c312383 47#include "bgpd/bgp_evpn.h"
9e15d76a 48#include "bgpd/bgp_rd.h"
fb018d25
DS
49
50extern struct zclient *zclient;
fb018d25 51
23f60ffd
DA
52static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
40381db7 54static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
cc9f21da 55static void bgp_nht_ifp_initial(struct thread *thread);
fb018d25 56
d62a17ae 57static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
d4d9d757 58{
d62a17ae 59 return (bgp_zebra_num_connects() == 0
c3b95419
EDP
60 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 && bnc->nexthop_num > 0));
d4d9d757
LB
62}
63
4cd690ae
PG
64static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
65 struct bgp_path_info *path)
66{
67 struct interface *ifp = NULL;
68 struct nexthop *nexthop;
69 struct bgp_interface *iifp;
70 struct peer *peer;
71
72 if (!path->extra || !path->extra->peer_orig)
73 return false;
74
75 peer = path->extra->peer_orig;
76
77 /* only connected ebgp peers are valid */
78 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
79 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
80 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
81 return false;
82
83 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
84 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
85 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
86 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
87 ifp = if_lookup_by_index(
88 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
89 bnc->bgp->vrf_id);
90 }
91 if (!ifp)
92 continue;
93 iifp = ifp->info;
94 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
95 return true;
96 }
97 return false;
98}
99
1bb550b6
PG
100static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
101 struct bgp_path_info *path)
102{
103 struct interface *ifp = NULL;
104 struct nexthop *nexthop;
105
106 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
107 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
108 ifp = if_lookup_by_index(
109 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
110 bnc->bgp->vrf_id);
111 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
112 ifp->ll_type == ZEBRA_LLT_IP6GRE))
113 break;
114 }
115 }
116 if (!ifp)
117 return false;
118
119 if (CHECK_FLAG(path->attr->rmap_change_flags,
120 BATTR_RMAP_L3VPN_ACCEPT_GRE))
121 return true;
122
123 return false;
124}
125
126static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
127 struct bgp_path_info *path)
960035b2 128{
7f8c7d91 129 /*
1bb550b6 130 * - In the case of MPLS-VPN, the label is learned from LDP or other
7f8c7d91
HS
131 * protocols, and nexthop tracking is enabled for the label.
132 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
1bb550b6 133 * - In the case of SRv6-VPN, we need to track the reachability to the
7f8c7d91
HS
134 * SID (in other words, IPv6 address). As in MPLS, we need to record
135 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
136 * currently not implemented, and this function assumes that all
137 * Transit routes for SRv6-VPN are valid.
1bb550b6 138 * - Otherwise check for mpls-gre acceptance
7f8c7d91 139 */
1bb550b6
PG
140 return (bgp_zebra_num_connects() == 0 ||
141 (bnc && (bnc->nexthop_num > 0 &&
46dbf9d0
DA
142 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
143 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
1bb550b6 144 bnc->bgp->srv6_enabled ||
4cd690ae 145 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
1bb550b6 146 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
960035b2
PZ
147}
148
d62a17ae 149static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
fb018d25 150{
d5c4bac9 151 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
036f482f
DA
152 if (BGP_DEBUG(nht, NHT))
153 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
154 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 155 bnc->bgp->name_pretty);
e37e1e27
PR
156 /* only unregister if this is the last nh for this prefix*/
157 if (!bnc_existing_for_prefix(bnc))
23f60ffd 158 unregister_zebra_rnh(bnc);
d62a17ae 159 bnc_free(bnc);
fb018d25 160 }
fb018d25
DS
161}
162
4b7e6066 163void bgp_unlink_nexthop(struct bgp_path_info *path)
f9164b1d 164{
d62a17ae 165 struct bgp_nexthop_cache *bnc = path->nexthop;
166
167 if (!bnc)
168 return;
f9164b1d 169
7f040da1 170 path_nh_map(path, NULL, false);
f9164b1d 171
d62a17ae 172 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
173}
174
996319e6
DS
175void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
176{
177 struct prefix pp;
178 struct prefix pt;
179 struct bgp_nexthop_cache *bncp, *bnct;
180 afi_t afi;
35aae5c9 181 ifindex_t ifindex = 0;
996319e6
DS
182
183 if (!sockunion2hostprefix(&from->su, &pp))
184 return;
185
35aae5c9
DS
186 /*
187 * Gather the ifindex for if up/down events to be
188 * tagged into this fun
189 */
190 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
191 ifindex = from->su.sin6.sin6_scope_id;
192
996319e6 193 afi = family2afi(pp.family);
35aae5c9 194 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
996319e6
DS
195
196 if (!sockunion2hostprefix(&to->su, &pt))
197 return;
198
35aae5c9
DS
199 /*
200 * Gather the ifindex for if up/down events to be
201 * tagged into this fun
202 */
203 ifindex = 0;
204 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
205 ifindex = to->su.sin6.sin6_scope_id;
206 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
996319e6
DS
207
208 if (bnct != bncp)
209 return;
210
211 if (bnct)
212 bnct->nht_info = to;
213}
214
51f3216b
PJD
215/*
216 * Returns the bnc whose bnc->nht_info matches the LL peer by
217 * looping through the IPv6 nexthop table
218 */
219static struct bgp_nexthop_cache *
220bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
221{
222 struct bgp_nexthop_cache *bnc;
223
224 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
225 bnc) {
226 if (bnc->nht_info == peer) {
227 if (BGP_DEBUG(nht, NHT)) {
228 zlog_debug(
229 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
230 &bnc->prefix, bnc->ifindex,
231 bnc->srte_color, bnc, peer->host,
232 peer->bgp->name_pretty, peer);
233 }
234 return bnc;
235 }
236 }
237
238 if (BGP_DEBUG(nht, NHT))
239 zlog_debug(
240 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
241 peer->host, peer->bgp->name_pretty, peer);
242
243 return NULL;
244}
245
d62a17ae 246void bgp_unlink_nexthop_by_peer(struct peer *peer)
f9164b1d 247{
d62a17ae 248 struct prefix p;
d62a17ae 249 struct bgp_nexthop_cache *bnc;
250 afi_t afi = family2afi(peer->su.sa.sa_family);
35aae5c9 251 ifindex_t ifindex = 0;
d62a17ae 252
51f3216b
PJD
253 if (!sockunion2hostprefix(&peer->su, &p)) {
254 /*
255 * In scenarios where unnumbered BGP session is brought
256 * down by shutting down the interface before unconfiguring
257 * the BGP neighbor, neighbor information in peer->su.sa
258 * will be cleared when the interface is shutdown. So
259 * during the deletion of unnumbered bgp peer, above check
260 * will return true. Therefore, in this case,BGP needs to
261 * find the bnc whose bnc->nht_info matches the
262 * peer being deleted and free it.
263 */
264 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
265 } else {
266 /*
267 * Gather the ifindex for if up/down events to be
268 * tagged into this fun
269 */
270 if (afi == AFI_IP6 &&
271 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
272 ifindex = peer->su.sin6.sin6_scope_id;
273 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
274 ifindex);
275 }
276
14315f2d 277 if (!bnc)
d62a17ae 278 return;
279
d62a17ae 280 /* cleanup the peer reference */
281 bnc->nht_info = NULL;
282
283 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
284}
285
960035b2
PZ
286/*
287 * A route and its nexthop might belong to different VRFs. Therefore,
288 * we need both the bgp_route and bgp_nexthop pointers.
289 */
290int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
4053e952 291 afi_t afi, safi_t safi, struct bgp_path_info *pi,
654a5978
PG
292 struct peer *peer, int connected,
293 const struct prefix *orig_prefix)
fb018d25 294{
f663c581 295 struct bgp_nexthop_cache_head *tree = NULL;
d62a17ae 296 struct bgp_nexthop_cache *bnc;
ac2f64d3 297 struct bgp_path_info *bpi_ultimate;
d62a17ae 298 struct prefix p;
545aeef1 299 uint32_t srte_color = 0;
d62a17ae 300 int is_bgp_static_route = 0;
8761cd6d 301 ifindex_t ifindex = 0;
d62a17ae 302
40381db7
DS
303 if (pi) {
304 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
305 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 306 ? 1
307 : 0;
308
309 /* Since Extended Next-hop Encoding (RFC5549) support, we want
310 to derive
311 address-family from the next-hop. */
312 if (!is_bgp_static_route)
7226bc40
TA
313 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
314 : AFI_IP;
d62a17ae 315
92d6f769
K
316 /* Validation for the ipv4 mapped ipv6 nexthop. */
317 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
318 afi = AFI_IP;
319 }
320
2951a7a4 321 /* This will return true if the global IPv6 NH is a link local
d62a17ae 322 * addr */
40381db7 323 if (make_prefix(afi, pi, &p) < 0)
d62a17ae 324 return 1;
545aeef1 325
654a5978
PG
326 if (!is_bgp_static_route && orig_prefix
327 && prefix_same(&p, orig_prefix)) {
328 if (BGP_DEBUG(nht, NHT)) {
329 zlog_debug(
330 "%s(%pFX): prefix loops through itself",
331 __func__, &p);
332 }
333 return 0;
334 }
335
545aeef1 336 srte_color = pi->attr->srte_color;
d62a17ae 337 } else if (peer) {
8761cd6d
DS
338 /*
339 * Gather the ifindex for if up/down events to be
340 * tagged into this fun
341 */
35aae5c9
DS
342 if (afi == AFI_IP6 &&
343 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
8761cd6d 344 ifindex = peer->su.sin6.sin6_scope_id;
35aae5c9
DS
345 if (ifindex == 0) {
346 if (BGP_DEBUG(nht, NHT)) {
347 zlog_debug(
348 "%s: Unable to locate ifindex, waiting till we have one",
349 peer->conf_if);
350 }
351 return 0;
352 }
353 }
8761cd6d 354
d62a17ae 355 if (!sockunion2hostprefix(&peer->su, &p)) {
356 if (BGP_DEBUG(nht, NHT)) {
357 zlog_debug(
358 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
15569c58 359 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 360 }
361 return 0;
362 }
363 } else
364 return 0;
365
366 if (is_bgp_static_route)
f663c581 367 tree = &bgp_nexthop->import_check_table[afi];
d62a17ae 368 else
f663c581 369 tree = &bgp_nexthop->nexthop_cache_table[afi];
d62a17ae 370
35aae5c9 371 bnc = bnc_find(tree, &p, srte_color, ifindex);
14315f2d 372 if (!bnc) {
35aae5c9 373 bnc = bnc_new(tree, &p, srte_color, ifindex);
960035b2 374 bnc->bgp = bgp_nexthop;
036f482f
DA
375 if (BGP_DEBUG(nht, NHT))
376 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
377 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 378 bnc->bgp->name_pretty, peer);
4115b296 379 } else {
036f482f 380 if (BGP_DEBUG(nht, NHT))
4115b296 381 zlog_debug(
036f482f
DA
382 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
383 &bnc->prefix, bnc->ifindex,
384 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
385 bnc->path_count, bnc->nht_info);
fc9a856f 386 }
d62a17ae 387
021b6596
AD
388 if (pi && is_route_parent_evpn(pi))
389 bnc->is_evpn_gwip_nexthop = true;
390
2bb8b49c 391 if (is_bgp_static_route) {
d62a17ae 392 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
393
394 /* If we're toggling the type, re-register */
892fedb6 395 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
d62a17ae 396 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
397 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
398 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
399 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
892fedb6
DA
400 } else if ((!CHECK_FLAG(bgp_route->flags,
401 BGP_FLAG_IMPORT_CHECK))
d62a17ae 402 && CHECK_FLAG(bnc->flags,
403 BGP_STATIC_ROUTE_EXACT_MATCH)) {
404 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
405 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
406 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
407 }
078430f6 408 }
d62a17ae 409 /* When nexthop is already known, but now requires 'connected'
410 * resolution,
411 * re-register it. The reverse scenario where the nexthop currently
412 * requires
413 * 'connected' resolution does not need a re-register (i.e., we treat
414 * 'connected-required' as an override) except in the scenario where
415 * this
416 * is actually a case of tracking a peer for connectivity (e.g., after
417 * disable connected-check).
418 * NOTE: We don't track the number of paths separately for 'connected-
419 * required' vs 'connected-not-required' as this change is not a common
420 * scenario.
421 */
422 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
423 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
424 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
425 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
2bb8b49c
DS
426 } else if (peer && !connected
427 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
d62a17ae 428 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
429 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
430 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
078430f6 431 }
4115b296 432 if (peer && (bnc->ifindex != ifindex)) {
433 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
434 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
435 bnc->ifindex = ifindex;
436 }
960035b2 437 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
1ee0a2df
DS
438 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
439 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
b54892e0 440 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
f663c581 441 && !is_default_host_route(&bnc->prefix))
23f60ffd 442 register_zebra_rnh(bnc);
1eb6c3ea 443
40381db7 444 if (pi && pi->nexthop != bnc) {
d62a17ae 445 /* Unlink from existing nexthop cache, if any. This will also
446 * free
447 * the nexthop cache entry, if appropriate.
448 */
40381db7 449 bgp_unlink_nexthop(pi);
d62a17ae 450
7f040da1
DS
451 /* updates NHT pi list reference */
452 path_nh_map(pi, bnc, true);
d62a17ae 453
ac2f64d3 454 bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
d62a17ae 455 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
ac2f64d3
LS
456 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
457 bnc->metric;
458 else if (bpi_ultimate->extra)
459 bpi_ultimate->extra->igpmetric = 0;
996319e6
DS
460 } else if (peer) {
461 /*
4667220e 462 * Let's not accidentally save the peer data for a peer
996319e6
DS
463 * we are going to throw away in a second or so.
464 * When we come back around we'll fix up this
465 * data properly in replace_nexthop_by_peer
466 */
467 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
468 bnc->nht_info = (void *)peer; /* NHT peer reference */
469 }
d62a17ae 470
471 /*
472 * We are cheating here. Views have no associated underlying
473 * ability to detect nexthops. So when we have a view
474 * just tell everyone the nexthop is valid
475 */
960035b2 476 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
d62a17ae 477 return 1;
1bb550b6
PG
478 else if (safi == SAFI_UNICAST && pi &&
479 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
480 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
481 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
482 else
d62a17ae 483 return (bgp_isvalid_nexthop(bnc));
fb018d25
DS
484}
485
d62a17ae 486void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
9a233a02 487{
d62a17ae 488 struct bgp_nexthop_cache *bnc;
489 struct prefix p;
35aae5c9 490 ifindex_t ifindex = 0;
d62a17ae 491
492 if (!peer)
493 return;
494
51f3216b
PJD
495 /*
496 * In case the below check evaluates true and if
497 * the bnc has not been freed at this point, then
498 * we might have to do something similar to what's
499 * done in bgp_unlink_nexthop_by_peer(). Since
500 * bgp_unlink_nexthop_by_peer() loops through the
501 * nodes of V6 nexthop cache to find the bnc, it is
502 * currently not being called here.
503 */
d62a17ae 504 if (!sockunion2hostprefix(&peer->su, &p))
505 return;
35aae5c9
DS
506 /*
507 * Gather the ifindex for if up/down events to be
508 * tagged into this fun
509 */
510 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
511 ifindex = peer->su.sin6.sin6_scope_id;
f663c581 512 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
35aae5c9 513 &p, 0, ifindex);
14315f2d
DS
514 if (!bnc) {
515 if (BGP_DEBUG(nht, NHT))
8c1a4c10 516 zlog_debug(
f663c581 517 "Cannot find connected NHT node for peer %s(%s)",
8c1a4c10 518 peer->host, peer->bgp->name_pretty);
14315f2d
DS
519 return;
520 }
d62a17ae 521
522 if (bnc->nht_info != peer) {
523 if (BGP_DEBUG(nht, NHT))
524 zlog_debug(
8c1a4c10
DS
525 "Connected NHT %p node for peer %s(%s) points to %p",
526 bnc, peer->host, bnc->bgp->name_pretty,
527 bnc->nht_info);
d62a17ae 528 return;
529 }
530
531 bnc->nht_info = NULL;
532
533 if (LIST_EMPTY(&(bnc->paths))) {
534 if (BGP_DEBUG(nht, NHT))
8c1a4c10
DS
535 zlog_debug(
536 "Freeing connected NHT node %p for peer %s(%s)",
537 bnc, peer->host, bnc->bgp->name_pretty);
23f60ffd 538 unregister_zebra_rnh(bnc);
d62a17ae 539 bnc_free(bnc);
540 }
9a233a02
DS
541}
542
545aeef1 543static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
9f002fa5
DS
544 struct zapi_route *nhr,
545 bool import_check)
fb018d25 546{
d62a17ae 547 struct nexthop *nexthop;
548 struct nexthop *oldnh;
549 struct nexthop *nhlist_head = NULL;
550 struct nexthop *nhlist_tail = NULL;
d62a17ae 551 int i;
021b6596 552 bool evpn_resolved = false;
14315f2d 553
083ec940 554 bnc->last_update = monotime(NULL);
d62a17ae 555 bnc->change_flags = 0;
d62a17ae 556
557 /* debug print the input */
987a720a
DS
558 if (BGP_DEBUG(nht, NHT)) {
559 char bnc_buf[BNC_FLAG_DUMP_SIZE];
560
d62a17ae 561 zlog_debug(
35aae5c9 562 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
2dbe669b 563 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
35aae5c9 564 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
987a720a
DS
565 nhr->nexthop_num, bnc->nexthop_num,
566 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
567 sizeof(bnc_buf)));
568 }
d62a17ae 569
545aeef1 570 if (nhr->metric != bnc->metric)
d62a17ae 571 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
572
545aeef1 573 if (nhr->nexthop_num != bnc->nexthop_num)
d62a17ae 574 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
575
9f002fa5
DS
576 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
577 !prefix_same(&bnc->prefix, &nhr->prefix))) {
578 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
579 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
580 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
581 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
582
583 bnc_nexthop_free(bnc);
584 bnc->nexthop = NULL;
585
586 if (BGP_DEBUG(nht, NHT))
587 zlog_debug(
588 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
589 __func__, &bnc->prefix, &nhr->prefix);
590 } else if (nhr->nexthop_num) {
6137a77d
DS
591 struct peer *peer = bnc->nht_info;
592
d62a17ae 593 /* notify bgp fsm if nbr ip goes from invalid->valid */
594 if (!bnc->nexthop_num)
595 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
596
021b6596
AD
597 if (!bnc->is_evpn_gwip_nexthop)
598 bnc->flags |= BGP_NEXTHOP_VALID;
545aeef1
RW
599 bnc->metric = nhr->metric;
600 bnc->nexthop_num = nhr->nexthop_num;
4a749e2c 601
960035b2
PZ
602 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
603
545aeef1 604 for (i = 0; i < nhr->nexthop_num; i++) {
960035b2
PZ
605 int num_labels = 0;
606
545aeef1 607 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
d62a17ae 608
6137a77d
DS
609 /*
610 * Turn on RA for the v6 nexthops
611 * we receive from bgp. This is to allow us
612 * to work with v4 routing over v6 nexthops
613 */
687a2b5d
DS
614 if (peer && !peer->ifp
615 && CHECK_FLAG(peer->flags,
616 PEER_FLAG_CAPABILITY_ENHE)
545aeef1 617 && nhr->prefix.family == AF_INET6
65f803e8 618 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
6137a77d
DS
619 struct interface *ifp;
620
621 ifp = if_lookup_by_index(nexthop->ifindex,
622 nexthop->vrf_id);
8c9769e0
DS
623 if (ifp)
624 zclient_send_interface_radv_req(
625 zclient, nexthop->vrf_id, ifp,
626 true,
627 BGP_UNNUM_DEFAULT_RA_INTERVAL);
6137a77d 628 }
960035b2
PZ
629 /* There is at least one label-switched path */
630 if (nexthop->nh_label &&
631 nexthop->nh_label->num_labels) {
632
633 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
634 num_labels = nexthop->nh_label->num_labels;
635 }
636
d62a17ae 637 if (BGP_DEBUG(nht, NHT)) {
638 char buf[NEXTHOP_STRLEN];
639 zlog_debug(
960035b2
PZ
640 " nhop via %s (%d labels)",
641 nexthop2str(nexthop, buf, sizeof(buf)),
642 num_labels);
d62a17ae 643 }
644
645 if (nhlist_tail) {
646 nhlist_tail->next = nexthop;
647 nhlist_tail = nexthop;
648 } else {
649 nhlist_tail = nexthop;
650 nhlist_head = nexthop;
651 }
652
653 /* No need to evaluate the nexthop if we have already
654 * determined
655 * that there has been a change.
656 */
657 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
658 continue;
659
660 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
78fba41b 661 if (nexthop_same(oldnh, nexthop))
d62a17ae 662 break;
663
664 if (!oldnh)
665 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
666 }
667 bnc_nexthop_free(bnc);
668 bnc->nexthop = nhlist_head;
021b6596
AD
669
670 /*
671 * Gateway IP nexthop is L3 reachable. Mark it as
672 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
673 * remote EVPN RT-2.
674 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
675 * When its mapping with EVPN RT-2 is established, unset
676 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
677 */
678 if (bnc->is_evpn_gwip_nexthop) {
679 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
680
511211bf 681 if (BGP_DEBUG(nht, NHT))
021b6596 682 zlog_debug(
511211bf
DA
683 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
684 &bnc->prefix,
021b6596
AD
685 (evpn_resolved ? "successful"
686 : "failed"));
021b6596
AD
687
688 if (evpn_resolved) {
689 bnc->flags |= BGP_NEXTHOP_VALID;
690 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
691 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
692 } else {
693 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
694 bnc->flags &= ~BGP_NEXTHOP_VALID;
695 }
696 }
d62a17ae 697 } else {
021b6596 698 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
d62a17ae 699 bnc->flags &= ~BGP_NEXTHOP_VALID;
c3b95419 700 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
545aeef1 701 bnc->nexthop_num = nhr->nexthop_num;
d62a17ae 702
703 /* notify bgp fsm if nbr ip goes from valid->invalid */
704 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
705
706 bnc_nexthop_free(bnc);
707 bnc->nexthop = NULL;
708 }
709
710 evaluate_paths(bnc);
fb018d25
DS
711}
712
8761cd6d
DS
713static void bgp_nht_ifp_table_handle(struct bgp *bgp,
714 struct bgp_nexthop_cache_head *table,
715 struct interface *ifp, bool up)
716{
717 struct bgp_nexthop_cache *bnc;
718
719 frr_each (bgp_nexthop_cache, table, bnc) {
720 if (bnc->ifindex != ifp->ifindex)
721 continue;
722
083ec940 723 bnc->last_update = monotime(NULL);
8761cd6d
DS
724 bnc->change_flags = 0;
725
474cfe4a
DS
726 /*
727 * For interface based routes ( ala the v6 LL routes
728 * that this was written for ) the metric received
729 * for the connected route is 0 not 1.
730 */
731 bnc->metric = 0;
8761cd6d
DS
732 if (up) {
733 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
734 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
8761cd6d
DS
735 bnc->nexthop_num = 1;
736 } else {
737 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
738 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
739 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
740 bnc->nexthop_num = 0;
8761cd6d
DS
741 }
742
743 evaluate_paths(bnc);
744 }
745}
746static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
747{
748 struct bgp *bgp;
749
096f7609 750 bgp = ifp->vrf->info;
8761cd6d
DS
751 if (!bgp)
752 return;
753
7f2e9cce
DS
754 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
755 up);
756 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
757 up);
8761cd6d
DS
758 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
759 up);
760 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
761 up);
762}
763
764void bgp_nht_ifp_up(struct interface *ifp)
765{
766 bgp_nht_ifp_handle(ifp, true);
767}
768
769void bgp_nht_ifp_down(struct interface *ifp)
770{
771 bgp_nht_ifp_handle(ifp, false);
772}
773
cc9f21da 774static void bgp_nht_ifp_initial(struct thread *thread)
8761cd6d
DS
775{
776 ifindex_t ifindex = THREAD_VAL(thread);
0b52b75a
IR
777 struct bgp *bgp = THREAD_ARG(thread);
778 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
8761cd6d
DS
779
780 if (!ifp)
cc9f21da 781 return;
8761cd6d 782
4115b296 783 if (BGP_DEBUG(nht, NHT))
784 zlog_debug(
785 "Handle NHT initial update for Intf %s(%d) status %s",
786 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
787
8761cd6d
DS
788 if (if_is_up(ifp))
789 bgp_nht_ifp_up(ifp);
790 else
791 bgp_nht_ifp_down(ifp);
8761cd6d
DS
792}
793
794/*
795 * So the bnc code has the ability to handle interface up/down
796 * events to properly handle v6 LL peering.
797 * What is happening here:
798 * The event system for peering expects the nht code to
799 * report on the tracking events after we move to active
800 * So let's give the system a chance to report on that event
801 * in a manner that is expected.
802 */
803void bgp_nht_interface_events(struct peer *peer)
804{
805 struct bgp *bgp = peer->bgp;
806 struct bgp_nexthop_cache_head *table;
807 struct bgp_nexthop_cache *bnc;
808 struct prefix p;
35aae5c9 809 ifindex_t ifindex = 0;
8761cd6d
DS
810
811 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
812 return;
813
814 if (!sockunion2hostprefix(&peer->su, &p))
815 return;
35aae5c9
DS
816 /*
817 * Gather the ifindex for if up/down events to be
818 * tagged into this fun
819 */
820 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
821 ifindex = peer->su.sin6.sin6_scope_id;
8761cd6d
DS
822
823 table = &bgp->nexthop_cache_table[AFI_IP6];
35aae5c9 824 bnc = bnc_find(table, &p, 0, ifindex);
8761cd6d
DS
825 if (!bnc)
826 return;
827
828 if (bnc->ifindex)
0b52b75a 829 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
8761cd6d
DS
830 bnc->ifindex, NULL);
831}
832
545aeef1
RW
833void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
834{
835 struct bgp_nexthop_cache_head *tree = NULL;
b8210849 836 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
545aeef1 837 struct bgp *bgp;
06e4e901 838 struct prefix match;
545aeef1
RW
839 struct zapi_route nhr;
840 afi_t afi;
841
842 bgp = bgp_lookup_by_vrf_id(vrf_id);
843 if (!bgp) {
844 flog_err(
845 EC_BGP_NH_UPD,
846 "parse nexthop update: instance not found for vrf_id %u",
847 vrf_id);
848 return;
849 }
850
06e4e901 851 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
cc42c4f0
DS
852 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
853 bgp->name_pretty);
545aeef1
RW
854 return;
855 }
856
06e4e901 857 afi = family2afi(match.family);
b8210849 858 tree = &bgp->nexthop_cache_table[afi];
545aeef1 859
35aae5c9 860 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
2bb8b49c
DS
861 if (!bnc_nhc) {
862 if (BGP_DEBUG(nht, NHT))
863 zlog_debug(
864 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
865 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
866 } else
9f002fa5 867 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
b8210849
DS
868
869 tree = &bgp->import_check_table[afi];
870
35aae5c9 871 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
2bb8b49c
DS
872 if (!bnc_import) {
873 if (BGP_DEBUG(nht, NHT))
874 zlog_debug(
875 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
876 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
877 } else
d00a5f6b 878 bgp_process_nexthop_update(bnc_import, &nhr, true);
1e24860b 879
545aeef1
RW
880 /*
881 * HACK: if any BGP route is dependant on an SR-policy that doesn't
882 * exist, zebra will never send NH updates relative to that policy. In
883 * that case, whenever we receive an update about a colorless NH, update
884 * the corresponding colorful NHs that share the same endpoint but that
885 * are inactive. This ugly hack should work around the problem at the
886 * cost of a performance pernalty. Long term, what should be done is to
887 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
888 * which should provide a better infrastructure to solve this issue in
889 * a more efficient and elegant way.
890 */
b8210849 891 if (nhr.srte_color == 0 && bnc_nhc) {
545aeef1
RW
892 struct bgp_nexthop_cache *bnc_iter;
893
894 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
895 bnc_iter) {
d00a5f6b
DS
896 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
897 bnc_iter->srte_color == 0 ||
898 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
545aeef1
RW
899 continue;
900
9f002fa5 901 bgp_process_nexthop_update(bnc_iter, &nhr, false);
545aeef1
RW
902 }
903 }
904}
905
ee7ca6c0 906/*
907 * Cleanup nexthop registration and status information for BGP nexthops
908 * pertaining to this VRF. This is invoked upon VRF deletion.
909 */
910void bgp_cleanup_nexthops(struct bgp *bgp)
911{
f663c581
RW
912 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
913 struct bgp_nexthop_cache *bnc;
ee7ca6c0 914
f663c581
RW
915 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
916 bnc) {
ee7ca6c0 917 /* Clear relevant flags. */
918 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
919 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
920 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
021b6596 921 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
ee7ca6c0 922 }
923 }
924}
925
fb018d25
DS
926/**
927 * make_prefix - make a prefix structure from the path (essentially
928 * path's node.
929 */
40381db7 930static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
fb018d25 931{
078430f6 932
40381db7
DS
933 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
934 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 935 ? 1
936 : 0;
9bcb3eef
DS
937 struct bgp_dest *net = pi->net;
938 const struct prefix *p_orig = bgp_dest_get_prefix(net);
92d6f769 939 struct in_addr ipv4;
0378bcaa
PG
940
941 if (p_orig->family == AF_FLOWSPEC) {
942 if (!pi->peer)
943 return -1;
944 return bgp_flowspec_get_first_nh(pi->peer->bgp,
1840384b 945 pi, p, afi);
0378bcaa 946 }
d62a17ae 947 memset(p, 0, sizeof(struct prefix));
948 switch (afi) {
949 case AFI_IP:
950 p->family = AF_INET;
951 if (is_bgp_static) {
b54892e0
DS
952 p->u.prefix4 = p_orig->u.prefix4;
953 p->prefixlen = p_orig->prefixlen;
d62a17ae 954 } else {
92d6f769
K
955 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
956 ipv4_mapped_ipv6_to_ipv4(
957 &pi->attr->mp_nexthop_global, &ipv4);
958 p->u.prefix4 = ipv4;
959 p->prefixlen = IPV4_MAX_BITLEN;
960 } else {
7226bc40
TA
961 if (p_orig->family == AF_EVPN)
962 p->u.prefix4 =
963 pi->attr->mp_nexthop_global_in;
964 else
965 p->u.prefix4 = pi->attr->nexthop;
92d6f769
K
966 p->prefixlen = IPV4_MAX_BITLEN;
967 }
d62a17ae 968 }
969 break;
970 case AFI_IP6:
d62a17ae 971 p->family = AF_INET6;
972
973 if (is_bgp_static) {
b54892e0
DS
974 p->u.prefix6 = p_orig->u.prefix6;
975 p->prefixlen = p_orig->prefixlen;
d62a17ae 976 } else {
606fdbb1
DA
977 /* If we receive MP_REACH nexthop with ::(LL)
978 * or LL(LL), use LL address as nexthop cache.
979 */
980 if (pi->attr->mp_nexthop_len
981 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
982 && (IN6_IS_ADDR_UNSPECIFIED(
983 &pi->attr->mp_nexthop_global)
984 || IN6_IS_ADDR_LINKLOCAL(
985 &pi->attr->mp_nexthop_global)))
986 p->u.prefix6 = pi->attr->mp_nexthop_local;
17ef5a93
PG
987 /* If we receive MR_REACH with (GA)::(LL)
988 * then check for route-map to choose GA or LL
989 */
990 else if (pi->attr->mp_nexthop_len
991 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
2bb8b49c 992 if (pi->attr->mp_nexthop_prefer_global)
17ef5a93
PG
993 p->u.prefix6 =
994 pi->attr->mp_nexthop_global;
995 else
996 p->u.prefix6 =
997 pi->attr->mp_nexthop_local;
998 } else
606fdbb1 999 p->u.prefix6 = pi->attr->mp_nexthop_global;
d62a17ae 1000 p->prefixlen = IPV6_MAX_BITLEN;
1001 }
1002 break;
1003 default:
1004 if (BGP_DEBUG(nht, NHT)) {
1005 zlog_debug(
1006 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
15569c58 1007 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 1008 }
1009 break;
65740e1b 1010 }
d62a17ae 1011 return 0;
fb018d25
DS
1012}
1013
1014/**
078430f6 1015 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
fb018d25
DS
1016 * command to Zebra.
1017 * ARGUMENTS:
1018 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
078430f6 1019 * int command -- command to send to zebra
fb018d25
DS
1020 * RETURNS:
1021 * void.
1022 */
d62a17ae 1023static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
fb018d25 1024{
3c192540 1025 bool exact_match = false;
ed6cec97 1026 bool resolve_via_default = false;
d62a17ae 1027 int ret;
1028
3c192540 1029 if (!zclient)
d62a17ae 1030 return;
1031
1032 /* Don't try to register if Zebra doesn't know of this instance. */
bb4ef1ae
DS
1033 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1034 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
1035 zlog_debug(
1036 "%s: No zebra instance to talk to, not installing NHT entry",
1037 __func__);
d62a17ae 1038 return;
bb4ef1ae 1039 }
d62a17ae 1040
1ee0a2df
DS
1041 if (!bgp_zebra_num_connects()) {
1042 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
1043 zlog_debug(
1044 "%s: We have not connected yet, cannot send nexthops",
1045 __func__);
1ee0a2df 1046 }
ed6cec97
DS
1047 if (command == ZEBRA_NEXTHOP_REGISTER) {
1048 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1049 exact_match = true;
1050 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1051 resolve_via_default = true;
1052 }
d62a17ae 1053
f663c581
RW
1054 if (BGP_DEBUG(zebra, ZEBRA))
1055 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1056 zserv_command_string(command), &bnc->prefix,
1057 bnc->bgp->name_pretty);
960035b2 1058
eb3c9d97
DL
1059 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1060 exact_match, resolve_via_default,
1061 bnc->bgp->vrf_id);
a6522038 1062 if (ret == ZCLIENT_SEND_FAILURE) {
e50f7cfd 1063 flog_warn(EC_BGP_ZEBRA_SEND,
f162a5b9 1064 "sendmsg_nexthop: zclient_send_message() failed");
a6522038 1065 return;
1066 }
d62a17ae 1067
3d174ce0 1068 if (command == ZEBRA_NEXTHOP_REGISTER)
d62a17ae 1069 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
3d174ce0 1070 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
d62a17ae 1071 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1072 return;
fb018d25
DS
1073}
1074
1075/**
078430f6
DS
1076 * register_zebra_rnh - register a NH/route with Zebra for notification
1077 * when the route or the route to the nexthop changes.
fb018d25 1078 * ARGUMENTS:
078430f6 1079 * struct bgp_nexthop_cache *bnc
fb018d25
DS
1080 * RETURNS:
1081 * void.
1082 */
23f60ffd 1083static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
fb018d25 1084{
d62a17ae 1085 /* Check if we have already registered */
1086 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1087 return;
8761cd6d
DS
1088
1089 if (bnc->ifindex) {
1090 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1091 return;
1092 }
1093
3d174ce0 1094 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
fb018d25
DS
1095}
1096
1097/**
078430f6 1098 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
fb018d25 1099 * ARGUMENTS:
078430f6 1100 * struct bgp_nexthop_cache *bnc
fb018d25
DS
1101 * RETURNS:
1102 * void.
1103 */
23f60ffd 1104static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
fb018d25 1105{
d62a17ae 1106 /* Check if we have already registered */
1107 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1108 return;
1109
8761cd6d
DS
1110 if (bnc->ifindex) {
1111 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1112 return;
1113 }
1114
3d174ce0 1115 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
fb018d25
DS
1116}
1117
1118/**
1119 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1120 * ARGUMENTS:
1121 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1122 * RETURNS:
1123 * void.
1124 */
021b6596 1125void evaluate_paths(struct bgp_nexthop_cache *bnc)
fb018d25 1126{
9bcb3eef 1127 struct bgp_dest *dest;
4b7e6066 1128 struct bgp_path_info *path;
ac2f64d3 1129 struct bgp_path_info *bpi_ultimate;
d62a17ae 1130 int afi;
1131 struct peer *peer = (struct peer *)bnc->nht_info;
1132 struct bgp_table *table;
1133 safi_t safi;
960035b2 1134 struct bgp *bgp_path;
b54892e0 1135 const struct prefix *p;
d62a17ae 1136
1137 if (BGP_DEBUG(nht, NHT)) {
987a720a 1138 char bnc_buf[BNC_FLAG_DUMP_SIZE];
df2a41a9 1139 char chg_buf[BNC_FLAG_DUMP_SIZE];
987a720a 1140
d62a17ae 1141 zlog_debug(
036f482f
DA
1142 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1143 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 1144 bnc->bgp->name_pretty,
df2a41a9
DS
1145 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1146 sizeof(bnc_buf)),
1147 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1148 sizeof(bnc_buf)));
fb018d25
DS
1149 }
1150
a2addae8 1151 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
d62a17ae 1152 if (!(path->type == ZEBRA_ROUTE_BGP
1153 && ((path->sub_type == BGP_ROUTE_NORMAL)
960035b2
PZ
1154 || (path->sub_type == BGP_ROUTE_STATIC)
1155 || (path->sub_type == BGP_ROUTE_IMPORTED))))
d62a17ae 1156 continue;
1157
9bcb3eef
DS
1158 dest = path->net;
1159 assert(dest && bgp_dest_table(dest));
1160 p = bgp_dest_get_prefix(dest);
b54892e0 1161 afi = family2afi(p->family);
9bcb3eef 1162 table = bgp_dest_table(dest);
d62a17ae 1163 safi = table->safi;
1164
960035b2
PZ
1165 /*
1166 * handle routes from other VRFs (they can have a
1167 * nexthop in THIS VRF). bgp_path is the bgp instance
1168 * that owns the route referencing this nexthop.
1169 */
1170 bgp_path = table->bgp;
1171
1172 /*
1173 * Path becomes valid/invalid depending on whether the nexthop
d62a17ae 1174 * reachable/unreachable.
960035b2
PZ
1175 *
1176 * In case of unicast routes that were imported from vpn
1177 * and that have labels, they are valid only if there are
1178 * nexthops with labels
a2299aba
AD
1179 *
1180 * If the nexthop is EVPN gateway-IP,
1181 * do not check for a valid label.
d62a17ae 1182 */
960035b2 1183
34ea39b6 1184 bool bnc_is_valid_nexthop = false;
1185 bool path_valid = false;
960035b2 1186
021b6596
AD
1187 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1188 && path->extra && path->extra->num_labels
1189 && (path->attr->evpn_overlay.type
1190 != OVERLAY_INDEX_GATEWAY_IP)) {
960035b2 1191 bnc_is_valid_nexthop =
1bb550b6
PG
1192 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1193 : false;
960035b2 1194 } else {
e7cbe5e5
NT
1195 if (bgp_update_martian_nexthop(
1196 bnc->bgp, afi, safi, path->type,
9bcb3eef 1197 path->sub_type, path->attr, dest)) {
e7cbe5e5
NT
1198 if (BGP_DEBUG(nht, NHT))
1199 zlog_debug(
56ca3b5b 1200 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
9bcb3eef 1201 __func__, dest, bgp_path->name);
e7cbe5e5
NT
1202 } else
1203 bnc_is_valid_nexthop =
34ea39b6 1204 bgp_isvalid_nexthop(bnc) ? true : false;
960035b2
PZ
1205 }
1206
9e15d76a 1207 if (BGP_DEBUG(nht, NHT)) {
c4f64ea9 1208 if (dest->pdest)
9e15d76a 1209 zlog_debug(
c4f64ea9
DA
1210 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1211 afi, safi, dest,
1212 (struct prefix_rd *)bgp_dest_get_prefix(
1213 dest->pdest),
9e15d76a 1214 bgp_path->name_pretty, path->flags);
c4f64ea9 1215 else
9e15d76a 1216 zlog_debug(
56ca3b5b 1217 "... eval path %d/%d %pBD %s flags 0x%x",
9bcb3eef 1218 afi, safi, dest, bgp_path->name_pretty,
9e15d76a 1219 path->flags);
1220 }
d62a17ae 1221
0139efe0 1222 /* Skip paths marked for removal or as history. */
1223 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1224 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1225 continue;
1226
d62a17ae 1227 /* Copy the metric to the path. Will be used for bestpath
1228 * computation */
ac2f64d3 1229 bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
d62a17ae 1230 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
ac2f64d3 1231 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
18ee8310 1232 bnc->metric;
ac2f64d3
LS
1233 else if (bpi_ultimate->extra)
1234 bpi_ultimate->extra->igpmetric = 0;
d62a17ae 1235
1236 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
545aeef1
RW
1237 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1238 || path->attr->srte_color != 0)
1defdda8 1239 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
d62a17ae 1240
d4980edf 1241 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
34ea39b6 1242 if (path_valid != bnc_is_valid_nexthop) {
1243 if (path_valid) {
1244 /* No longer valid, clear flag; also for EVPN
1245 * routes, unimport from VRFs if needed.
1246 */
1247 bgp_aggregate_decrement(bgp_path, p, path, afi,
1248 safi);
9bcb3eef 1249 bgp_path_info_unset_flag(dest, path,
34ea39b6 1250 BGP_PATH_VALID);
1251 if (safi == SAFI_EVPN &&
9bcb3eef 1252 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 1253 bgp_evpn_unimport_route(bgp_path,
9bcb3eef 1254 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 1255 } else {
1256 /* Path becomes valid, set flag; also for EVPN
1257 * routes, import from VRFs if needed.
1258 */
9bcb3eef 1259 bgp_path_info_set_flag(dest, path,
34ea39b6 1260 BGP_PATH_VALID);
1261 bgp_aggregate_increment(bgp_path, p, path, afi,
1262 safi);
1263 if (safi == SAFI_EVPN &&
9bcb3eef 1264 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 1265 bgp_evpn_import_route(bgp_path,
9bcb3eef 1266 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 1267 }
7c312383
AD
1268 }
1269
9bcb3eef 1270 bgp_process(bgp_path, dest, afi, safi);
d62a17ae 1271 }
fc9a856f 1272
1e91f1d1
DS
1273 if (peer) {
1274 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1275
824065c4
DS
1276 if (valid_nexthops) {
1277 /*
1278 * Peering cannot occur across a blackhole nexthop
1279 */
e817f2cc 1280 if (bnc->nexthop_num == 1 && bnc->nexthop
824065c4
DS
1281 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1282 peer->last_reset = PEER_DOWN_WAITING_NHT;
1283 valid_nexthops = 0;
1284 } else
1285 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1286 } else
1e91f1d1
DS
1287 peer->last_reset = PEER_DOWN_WAITING_NHT;
1288
1289 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1290 if (BGP_DEBUG(nht, NHT))
15569c58 1291 zlog_debug(
8761cd6d 1292 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
15569c58 1293 __func__, peer->host,
8761cd6d
DS
1294 peer->bgp->name_pretty,
1295 !!valid_nexthops);
f8dcd38d 1296 bgp_fsm_nht_update(peer, !!valid_nexthops);
1e91f1d1
DS
1297 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1298 }
d62a17ae 1299 }
fc9a856f 1300
d62a17ae 1301 RESET_FLAG(bnc->change_flags);
fb018d25
DS
1302}
1303
1304/**
1305 * path_nh_map - make or break path-to-nexthop association.
1306 * ARGUMENTS:
1307 * path - pointer to the path structure
1308 * bnc - pointer to the nexthop structure
1309 * make - if set, make the association. if unset, just break the existing
1310 * association.
1311 */
7f040da1
DS
1312void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1313 bool make)
fb018d25 1314{
d62a17ae 1315 if (path->nexthop) {
1316 LIST_REMOVE(path, nh_thread);
1317 path->nexthop->path_count--;
1318 path->nexthop = NULL;
1319 }
1320 if (make) {
1321 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1322 path->nexthop = bnc;
1323 path->nexthop->path_count++;
1324 }
fb018d25 1325}
1ee0a2df
DS
1326
1327/*
1328 * This function is called to register nexthops to zebra
1329 * as that we may have tried to install the nexthops
1330 * before we actually have a zebra connection
1331 */
1332void bgp_nht_register_nexthops(struct bgp *bgp)
1333{
f663c581
RW
1334 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1335 struct bgp_nexthop_cache *bnc;
1ee0a2df 1336
f663c581
RW
1337 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1338 bnc) {
23f60ffd 1339 register_zebra_rnh(bnc);
1ee0a2df
DS
1340 }
1341 }
1342}
1ea03b90 1343
b3a3290e 1344void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1ea03b90
DS
1345{
1346 struct bgp *bgp;
1ea03b90
DS
1347 struct bgp_nexthop_cache *bnc;
1348 struct nexthop *nhop;
1349 struct interface *ifp;
1350 struct prefix p;
35aae5c9 1351 ifindex_t ifindex = 0;
1ea03b90
DS
1352
1353 if (peer->ifp)
1354 return;
1355
1356 bgp = peer->bgp;
1ea03b90 1357 if (!sockunion2hostprefix(&peer->su, &p)) {
b3a3290e
DS
1358 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1359 __func__, peer->host);
1ea03b90
DS
1360 return;
1361 }
1362
1363 if (p.family != AF_INET6)
1364 return;
35aae5c9
DS
1365 /*
1366 * Gather the ifindex for if up/down events to be
1367 * tagged into this fun
1368 */
1369 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1370 ifindex = peer->su.sin6.sin6_scope_id;
1ea03b90 1371
35aae5c9 1372 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1ea03b90
DS
1373 if (!bnc)
1374 return;
1375
1376 if (peer != bnc->nht_info)
1377 return;
1378
1379 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
8c9769e0
DS
1380 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1381
1382 if (!ifp)
1383 continue;
1384
1ea03b90
DS
1385 zclient_send_interface_radv_req(zclient,
1386 nhop->vrf_id,
1387 ifp, true,
1388 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1389 }
1390}
b3a3290e
DS
1391
1392void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1393{
1394 struct bgp *bgp;
b3a3290e
DS
1395 struct bgp_nexthop_cache *bnc;
1396 struct nexthop *nhop;
1397 struct interface *ifp;
1398 struct prefix p;
35aae5c9 1399 ifindex_t ifindex = 0;
b3a3290e
DS
1400
1401 if (peer->ifp)
1402 return;
1403
1404 bgp = peer->bgp;
1405
b3a3290e
DS
1406 if (!sockunion2hostprefix(&peer->su, &p)) {
1407 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1408 __func__, peer->host);
1409 return;
1410 }
1411
1412 if (p.family != AF_INET6)
1413 return;
35aae5c9
DS
1414 /*
1415 * Gather the ifindex for if up/down events to be
1416 * tagged into this fun
1417 */
1418 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1419 ifindex = peer->su.sin6.sin6_scope_id;
b3a3290e 1420
35aae5c9 1421 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
b3a3290e
DS
1422 if (!bnc)
1423 return;
1424
1425 if (peer != bnc->nht_info)
1426 return;
1427
1428 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1429 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1430
68cecc3b
DS
1431 if (!ifp)
1432 continue;
1433
b3a3290e
DS
1434 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1435 0);
1436 }
1437}
c589d847
AK
1438
1439/****************************************************************************
1440 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1441 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1442 * left to the application using it.
1443 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1444 * failover of remote ES links.
1445 ***************************************************************************/
1446static bitfield_t bgp_nh_id_bitmap;
8bcb09a1 1447static uint32_t bgp_l3nhg_start;
c589d847 1448
8bcb09a1
AK
1449/* XXX - currently we do nothing on the callbacks */
1450static void bgp_l3nhg_add_cb(const char *name)
1451{
1452}
f3c6dd49
DS
1453
1454static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1455{
1456}
1457
8bcb09a1
AK
1458static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1459 const struct nexthop *nhop)
1460{
1461}
f3c6dd49 1462
8bcb09a1
AK
1463static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1464 const struct nexthop *nhop)
1465{
1466}
f3c6dd49 1467
8bcb09a1 1468static void bgp_l3nhg_del_cb(const char *name)
c589d847 1469{
c589d847
AK
1470}
1471
8bcb09a1 1472static void bgp_l3nhg_zebra_init(void)
c589d847 1473{
8bcb09a1
AK
1474 static bool bgp_l3nhg_zebra_inited;
1475 if (bgp_l3nhg_zebra_inited)
c589d847
AK
1476 return;
1477
8bcb09a1
AK
1478 bgp_l3nhg_zebra_inited = true;
1479 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
f3c6dd49
DS
1480 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1481 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1482 bgp_l3nhg_del_cb);
c589d847
AK
1483}
1484
8bcb09a1 1485
c589d847
AK
1486void bgp_l3nhg_init(void)
1487{
8bcb09a1
AK
1488 uint32_t id_max;
1489
7a8ce9d5 1490 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
8bcb09a1 1491 bf_init(bgp_nh_id_bitmap, id_max);
c589d847 1492 bf_assign_zero_index(bgp_nh_id_bitmap);
8bcb09a1
AK
1493
1494 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1495 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1496 bgp_l3nhg_start + id_max);
c589d847
AK
1497}
1498
1499void bgp_l3nhg_finish(void)
1500{
1501 bf_free(bgp_nh_id_bitmap);
1502}
8bcb09a1
AK
1503
1504uint32_t bgp_l3nhg_id_alloc(void)
1505{
1506 uint32_t nhg_id = 0;
1507
1508 bgp_l3nhg_zebra_init();
1509 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1510 if (nhg_id)
1511 nhg_id += bgp_l3nhg_start;
1512
1513 return nhg_id;
1514}
1515
1516void bgp_l3nhg_id_free(uint32_t nhg_id)
1517{
1518 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1519 return;
1520
1521 nhg_id -= bgp_l3nhg_start;
1522
1523 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1524}