]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_nht.c
topotests: add bgp vpnv4 over gre test
[mirror_frr.git] / bgpd / bgp_nht.c
CommitLineData
fb018d25
DS
1/* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
fb018d25
DS
19 */
20
21#include <zebra.h>
22
23#include "command.h"
24#include "thread.h"
25#include "prefix.h"
26#include "zclient.h"
27#include "stream.h"
28#include "network.h"
29#include "log.h"
30#include "memory.h"
31#include "nexthop.h"
7076bb2f 32#include "vrf.h"
039f3a34 33#include "filter.h"
8bcb09a1 34#include "nexthop_group.h"
fb018d25
DS
35
36#include "bgpd/bgpd.h"
37#include "bgpd/bgp_table.h"
38#include "bgpd/bgp_route.h"
39#include "bgpd/bgp_attr.h"
40#include "bgpd/bgp_nexthop.h"
41#include "bgpd/bgp_debug.h"
14454c9f 42#include "bgpd/bgp_errors.h"
fb018d25 43#include "bgpd/bgp_nht.h"
ffd0c037 44#include "bgpd/bgp_fsm.h"
afbb1c59 45#include "bgpd/bgp_zebra.h"
0378bcaa 46#include "bgpd/bgp_flowspec_util.h"
7c312383 47#include "bgpd/bgp_evpn.h"
9e15d76a 48#include "bgpd/bgp_rd.h"
fb018d25
DS
49
50extern struct zclient *zclient;
fb018d25 51
23f60ffd
DA
52static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
40381db7 54static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
cc9f21da 55static void bgp_nht_ifp_initial(struct thread *thread);
fb018d25 56
d62a17ae 57static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
d4d9d757 58{
d62a17ae 59 return (bgp_zebra_num_connects() == 0
c3b95419
EDP
60 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 && bnc->nexthop_num > 0));
d4d9d757
LB
62}
63
1bb550b6
PG
64static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
65 struct bgp_path_info *path)
66{
67 struct interface *ifp = NULL;
68 struct nexthop *nexthop;
69
70 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
71 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
72 ifp = if_lookup_by_index(
73 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
74 bnc->bgp->vrf_id);
75 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
76 ifp->ll_type == ZEBRA_LLT_IP6GRE))
77 break;
78 }
79 }
80 if (!ifp)
81 return false;
82
83 if (CHECK_FLAG(path->attr->rmap_change_flags,
84 BATTR_RMAP_L3VPN_ACCEPT_GRE))
85 return true;
86
87 return false;
88}
89
90static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
91 struct bgp_path_info *path)
960035b2 92{
7f8c7d91 93 /*
1bb550b6 94 * - In the case of MPLS-VPN, the label is learned from LDP or other
7f8c7d91
HS
95 * protocols, and nexthop tracking is enabled for the label.
96 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
1bb550b6 97 * - In the case of SRv6-VPN, we need to track the reachability to the
7f8c7d91
HS
98 * SID (in other words, IPv6 address). As in MPLS, we need to record
99 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
100 * currently not implemented, and this function assumes that all
101 * Transit routes for SRv6-VPN are valid.
1bb550b6 102 * - Otherwise check for mpls-gre acceptance
7f8c7d91 103 */
1bb550b6
PG
104 return (bgp_zebra_num_connects() == 0 ||
105 (bnc && (bnc->nexthop_num > 0 &&
106 (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
107 bnc->bgp->srv6_enabled ||
108 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
960035b2
PZ
109}
110
d62a17ae 111static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
fb018d25 112{
d5c4bac9 113 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
036f482f
DA
114 if (BGP_DEBUG(nht, NHT))
115 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
116 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 117 bnc->bgp->name_pretty);
e37e1e27
PR
118 /* only unregister if this is the last nh for this prefix*/
119 if (!bnc_existing_for_prefix(bnc))
23f60ffd 120 unregister_zebra_rnh(bnc);
d62a17ae 121 bnc_free(bnc);
fb018d25 122 }
fb018d25
DS
123}
124
4b7e6066 125void bgp_unlink_nexthop(struct bgp_path_info *path)
f9164b1d 126{
d62a17ae 127 struct bgp_nexthop_cache *bnc = path->nexthop;
128
129 if (!bnc)
130 return;
f9164b1d 131
7f040da1 132 path_nh_map(path, NULL, false);
f9164b1d 133
d62a17ae 134 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
135}
136
996319e6
DS
137void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
138{
139 struct prefix pp;
140 struct prefix pt;
141 struct bgp_nexthop_cache *bncp, *bnct;
142 afi_t afi;
35aae5c9 143 ifindex_t ifindex = 0;
996319e6
DS
144
145 if (!sockunion2hostprefix(&from->su, &pp))
146 return;
147
35aae5c9
DS
148 /*
149 * Gather the ifindex for if up/down events to be
150 * tagged into this fun
151 */
152 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
153 ifindex = from->su.sin6.sin6_scope_id;
154
996319e6 155 afi = family2afi(pp.family);
35aae5c9 156 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
996319e6
DS
157
158 if (!sockunion2hostprefix(&to->su, &pt))
159 return;
160
35aae5c9
DS
161 /*
162 * Gather the ifindex for if up/down events to be
163 * tagged into this fun
164 */
165 ifindex = 0;
166 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
167 ifindex = to->su.sin6.sin6_scope_id;
168 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
996319e6
DS
169
170 if (bnct != bncp)
171 return;
172
173 if (bnct)
174 bnct->nht_info = to;
175}
176
d62a17ae 177void bgp_unlink_nexthop_by_peer(struct peer *peer)
f9164b1d 178{
d62a17ae 179 struct prefix p;
d62a17ae 180 struct bgp_nexthop_cache *bnc;
181 afi_t afi = family2afi(peer->su.sa.sa_family);
35aae5c9 182 ifindex_t ifindex = 0;
d62a17ae 183
184 if (!sockunion2hostprefix(&peer->su, &p))
185 return;
35aae5c9
DS
186 /*
187 * Gather the ifindex for if up/down events to be
188 * tagged into this fun
189 */
190 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
191 ifindex = peer->su.sin6.sin6_scope_id;
192 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0, ifindex);
14315f2d 193 if (!bnc)
d62a17ae 194 return;
195
d62a17ae 196 /* cleanup the peer reference */
197 bnc->nht_info = NULL;
198
199 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
200}
201
960035b2
PZ
202/*
203 * A route and its nexthop might belong to different VRFs. Therefore,
204 * we need both the bgp_route and bgp_nexthop pointers.
205 */
206int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
4053e952 207 afi_t afi, safi_t safi, struct bgp_path_info *pi,
654a5978
PG
208 struct peer *peer, int connected,
209 const struct prefix *orig_prefix)
fb018d25 210{
f663c581 211 struct bgp_nexthop_cache_head *tree = NULL;
d62a17ae 212 struct bgp_nexthop_cache *bnc;
213 struct prefix p;
545aeef1 214 uint32_t srte_color = 0;
d62a17ae 215 int is_bgp_static_route = 0;
8761cd6d 216 ifindex_t ifindex = 0;
d62a17ae 217
40381db7
DS
218 if (pi) {
219 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
220 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 221 ? 1
222 : 0;
223
224 /* Since Extended Next-hop Encoding (RFC5549) support, we want
225 to derive
226 address-family from the next-hop. */
227 if (!is_bgp_static_route)
7226bc40
TA
228 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
229 : AFI_IP;
d62a17ae 230
92d6f769
K
231 /* Validation for the ipv4 mapped ipv6 nexthop. */
232 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
233 afi = AFI_IP;
234 }
235
2951a7a4 236 /* This will return true if the global IPv6 NH is a link local
d62a17ae 237 * addr */
40381db7 238 if (make_prefix(afi, pi, &p) < 0)
d62a17ae 239 return 1;
545aeef1 240
654a5978
PG
241 if (!is_bgp_static_route && orig_prefix
242 && prefix_same(&p, orig_prefix)) {
243 if (BGP_DEBUG(nht, NHT)) {
244 zlog_debug(
245 "%s(%pFX): prefix loops through itself",
246 __func__, &p);
247 }
248 return 0;
249 }
250
545aeef1 251 srte_color = pi->attr->srte_color;
d62a17ae 252 } else if (peer) {
8761cd6d
DS
253 /*
254 * Gather the ifindex for if up/down events to be
255 * tagged into this fun
256 */
35aae5c9
DS
257 if (afi == AFI_IP6 &&
258 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
8761cd6d 259 ifindex = peer->su.sin6.sin6_scope_id;
35aae5c9
DS
260 if (ifindex == 0) {
261 if (BGP_DEBUG(nht, NHT)) {
262 zlog_debug(
263 "%s: Unable to locate ifindex, waiting till we have one",
264 peer->conf_if);
265 }
266 return 0;
267 }
268 }
8761cd6d 269
d62a17ae 270 if (!sockunion2hostprefix(&peer->su, &p)) {
271 if (BGP_DEBUG(nht, NHT)) {
272 zlog_debug(
273 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
15569c58 274 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 275 }
276 return 0;
277 }
278 } else
279 return 0;
280
281 if (is_bgp_static_route)
f663c581 282 tree = &bgp_nexthop->import_check_table[afi];
d62a17ae 283 else
f663c581 284 tree = &bgp_nexthop->nexthop_cache_table[afi];
d62a17ae 285
35aae5c9 286 bnc = bnc_find(tree, &p, srte_color, ifindex);
14315f2d 287 if (!bnc) {
35aae5c9 288 bnc = bnc_new(tree, &p, srte_color, ifindex);
960035b2 289 bnc->bgp = bgp_nexthop;
036f482f
DA
290 if (BGP_DEBUG(nht, NHT))
291 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
292 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 293 bnc->bgp->name_pretty, peer);
4115b296 294 } else {
036f482f 295 if (BGP_DEBUG(nht, NHT))
4115b296 296 zlog_debug(
036f482f
DA
297 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
298 &bnc->prefix, bnc->ifindex,
299 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
300 bnc->path_count, bnc->nht_info);
fc9a856f 301 }
d62a17ae 302
021b6596
AD
303 if (pi && is_route_parent_evpn(pi))
304 bnc->is_evpn_gwip_nexthop = true;
305
d62a17ae 306 if (is_bgp_static_route) {
307 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
308
309 /* If we're toggling the type, re-register */
892fedb6 310 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
d62a17ae 311 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
312 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
313 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
314 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
892fedb6
DA
315 } else if ((!CHECK_FLAG(bgp_route->flags,
316 BGP_FLAG_IMPORT_CHECK))
d62a17ae 317 && CHECK_FLAG(bnc->flags,
318 BGP_STATIC_ROUTE_EXACT_MATCH)) {
319 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
320 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
321 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
322 }
078430f6 323 }
d62a17ae 324 /* When nexthop is already known, but now requires 'connected'
325 * resolution,
326 * re-register it. The reverse scenario where the nexthop currently
327 * requires
328 * 'connected' resolution does not need a re-register (i.e., we treat
329 * 'connected-required' as an override) except in the scenario where
330 * this
331 * is actually a case of tracking a peer for connectivity (e.g., after
332 * disable connected-check).
333 * NOTE: We don't track the number of paths separately for 'connected-
334 * required' vs 'connected-not-required' as this change is not a common
335 * scenario.
336 */
337 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
338 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
339 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
340 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
341 } else if (peer && !connected
342 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
343 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
344 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
345 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
078430f6 346 }
4115b296 347 if (peer && (bnc->ifindex != ifindex)) {
348 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
349 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
350 bnc->ifindex = ifindex;
351 }
960035b2 352 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
1ee0a2df
DS
353 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
354 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
b54892e0 355 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
f663c581 356 && !is_default_host_route(&bnc->prefix))
23f60ffd 357 register_zebra_rnh(bnc);
1eb6c3ea 358
40381db7 359 if (pi && pi->nexthop != bnc) {
d62a17ae 360 /* Unlink from existing nexthop cache, if any. This will also
361 * free
362 * the nexthop cache entry, if appropriate.
363 */
40381db7 364 bgp_unlink_nexthop(pi);
d62a17ae 365
7f040da1
DS
366 /* updates NHT pi list reference */
367 path_nh_map(pi, bnc, true);
d62a17ae 368
369 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
40381db7
DS
370 (bgp_path_info_extra_get(pi))->igpmetric = bnc->metric;
371 else if (pi->extra)
372 pi->extra->igpmetric = 0;
996319e6
DS
373 } else if (peer) {
374 /*
4667220e 375 * Let's not accidentally save the peer data for a peer
996319e6
DS
376 * we are going to throw away in a second or so.
377 * When we come back around we'll fix up this
378 * data properly in replace_nexthop_by_peer
379 */
380 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
381 bnc->nht_info = (void *)peer; /* NHT peer reference */
382 }
d62a17ae 383
384 /*
385 * We are cheating here. Views have no associated underlying
386 * ability to detect nexthops. So when we have a view
387 * just tell everyone the nexthop is valid
388 */
960035b2 389 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
d62a17ae 390 return 1;
1bb550b6
PG
391 else if (safi == SAFI_UNICAST && pi &&
392 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
393 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
394 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
395 else
d62a17ae 396 return (bgp_isvalid_nexthop(bnc));
fb018d25
DS
397}
398
d62a17ae 399void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
9a233a02 400{
d62a17ae 401 struct bgp_nexthop_cache *bnc;
402 struct prefix p;
35aae5c9 403 ifindex_t ifindex = 0;
d62a17ae 404
405 if (!peer)
406 return;
407
d62a17ae 408 if (!sockunion2hostprefix(&peer->su, &p))
409 return;
35aae5c9
DS
410 /*
411 * Gather the ifindex for if up/down events to be
412 * tagged into this fun
413 */
414 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
415 ifindex = peer->su.sin6.sin6_scope_id;
f663c581 416 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
35aae5c9 417 &p, 0, ifindex);
14315f2d
DS
418 if (!bnc) {
419 if (BGP_DEBUG(nht, NHT))
8c1a4c10 420 zlog_debug(
f663c581 421 "Cannot find connected NHT node for peer %s(%s)",
8c1a4c10 422 peer->host, peer->bgp->name_pretty);
14315f2d
DS
423 return;
424 }
d62a17ae 425
426 if (bnc->nht_info != peer) {
427 if (BGP_DEBUG(nht, NHT))
428 zlog_debug(
8c1a4c10
DS
429 "Connected NHT %p node for peer %s(%s) points to %p",
430 bnc, peer->host, bnc->bgp->name_pretty,
431 bnc->nht_info);
d62a17ae 432 return;
433 }
434
435 bnc->nht_info = NULL;
436
437 if (LIST_EMPTY(&(bnc->paths))) {
438 if (BGP_DEBUG(nht, NHT))
8c1a4c10
DS
439 zlog_debug(
440 "Freeing connected NHT node %p for peer %s(%s)",
441 bnc, peer->host, bnc->bgp->name_pretty);
23f60ffd 442 unregister_zebra_rnh(bnc);
d62a17ae 443 bnc_free(bnc);
444 }
9a233a02
DS
445}
446
545aeef1 447static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
9f002fa5
DS
448 struct zapi_route *nhr,
449 bool import_check)
fb018d25 450{
d62a17ae 451 struct nexthop *nexthop;
452 struct nexthop *oldnh;
453 struct nexthop *nhlist_head = NULL;
454 struct nexthop *nhlist_tail = NULL;
d62a17ae 455 int i;
021b6596 456 bool evpn_resolved = false;
14315f2d 457
083ec940 458 bnc->last_update = monotime(NULL);
d62a17ae 459 bnc->change_flags = 0;
d62a17ae 460
461 /* debug print the input */
987a720a
DS
462 if (BGP_DEBUG(nht, NHT)) {
463 char bnc_buf[BNC_FLAG_DUMP_SIZE];
464
d62a17ae 465 zlog_debug(
35aae5c9 466 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
2dbe669b 467 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
35aae5c9 468 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
987a720a
DS
469 nhr->nexthop_num, bnc->nexthop_num,
470 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
471 sizeof(bnc_buf)));
472 }
d62a17ae 473
545aeef1 474 if (nhr->metric != bnc->metric)
d62a17ae 475 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
476
545aeef1 477 if (nhr->nexthop_num != bnc->nexthop_num)
d62a17ae 478 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
479
9f002fa5
DS
480 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
481 !prefix_same(&bnc->prefix, &nhr->prefix))) {
482 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
483 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
484 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
485 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
486
487 bnc_nexthop_free(bnc);
488 bnc->nexthop = NULL;
489
490 if (BGP_DEBUG(nht, NHT))
491 zlog_debug(
492 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
493 __func__, &bnc->prefix, &nhr->prefix);
494 } else if (nhr->nexthop_num) {
6137a77d
DS
495 struct peer *peer = bnc->nht_info;
496
d62a17ae 497 /* notify bgp fsm if nbr ip goes from invalid->valid */
498 if (!bnc->nexthop_num)
499 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
500
021b6596
AD
501 if (!bnc->is_evpn_gwip_nexthop)
502 bnc->flags |= BGP_NEXTHOP_VALID;
545aeef1
RW
503 bnc->metric = nhr->metric;
504 bnc->nexthop_num = nhr->nexthop_num;
4a749e2c 505
960035b2
PZ
506 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
507
545aeef1 508 for (i = 0; i < nhr->nexthop_num; i++) {
960035b2
PZ
509 int num_labels = 0;
510
545aeef1 511 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
d62a17ae 512
6137a77d
DS
513 /*
514 * Turn on RA for the v6 nexthops
515 * we receive from bgp. This is to allow us
516 * to work with v4 routing over v6 nexthops
517 */
687a2b5d
DS
518 if (peer && !peer->ifp
519 && CHECK_FLAG(peer->flags,
520 PEER_FLAG_CAPABILITY_ENHE)
545aeef1 521 && nhr->prefix.family == AF_INET6
65f803e8 522 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
6137a77d
DS
523 struct interface *ifp;
524
525 ifp = if_lookup_by_index(nexthop->ifindex,
526 nexthop->vrf_id);
8c9769e0
DS
527 if (ifp)
528 zclient_send_interface_radv_req(
529 zclient, nexthop->vrf_id, ifp,
530 true,
531 BGP_UNNUM_DEFAULT_RA_INTERVAL);
6137a77d 532 }
960035b2
PZ
533 /* There is at least one label-switched path */
534 if (nexthop->nh_label &&
535 nexthop->nh_label->num_labels) {
536
537 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
538 num_labels = nexthop->nh_label->num_labels;
539 }
540
d62a17ae 541 if (BGP_DEBUG(nht, NHT)) {
542 char buf[NEXTHOP_STRLEN];
543 zlog_debug(
960035b2
PZ
544 " nhop via %s (%d labels)",
545 nexthop2str(nexthop, buf, sizeof(buf)),
546 num_labels);
d62a17ae 547 }
548
549 if (nhlist_tail) {
550 nhlist_tail->next = nexthop;
551 nhlist_tail = nexthop;
552 } else {
553 nhlist_tail = nexthop;
554 nhlist_head = nexthop;
555 }
556
557 /* No need to evaluate the nexthop if we have already
558 * determined
559 * that there has been a change.
560 */
561 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
562 continue;
563
564 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
78fba41b 565 if (nexthop_same(oldnh, nexthop))
d62a17ae 566 break;
567
568 if (!oldnh)
569 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
570 }
571 bnc_nexthop_free(bnc);
572 bnc->nexthop = nhlist_head;
021b6596
AD
573
574 /*
575 * Gateway IP nexthop is L3 reachable. Mark it as
576 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
577 * remote EVPN RT-2.
578 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
579 * When its mapping with EVPN RT-2 is established, unset
580 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
581 */
582 if (bnc->is_evpn_gwip_nexthop) {
583 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
584
511211bf 585 if (BGP_DEBUG(nht, NHT))
021b6596 586 zlog_debug(
511211bf
DA
587 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
588 &bnc->prefix,
021b6596
AD
589 (evpn_resolved ? "successful"
590 : "failed"));
021b6596
AD
591
592 if (evpn_resolved) {
593 bnc->flags |= BGP_NEXTHOP_VALID;
594 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
595 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
596 } else {
597 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
598 bnc->flags &= ~BGP_NEXTHOP_VALID;
599 }
600 }
d62a17ae 601 } else {
021b6596 602 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
d62a17ae 603 bnc->flags &= ~BGP_NEXTHOP_VALID;
c3b95419 604 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
545aeef1 605 bnc->nexthop_num = nhr->nexthop_num;
d62a17ae 606
607 /* notify bgp fsm if nbr ip goes from valid->invalid */
608 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
609
610 bnc_nexthop_free(bnc);
611 bnc->nexthop = NULL;
612 }
613
614 evaluate_paths(bnc);
fb018d25
DS
615}
616
8761cd6d
DS
617static void bgp_nht_ifp_table_handle(struct bgp *bgp,
618 struct bgp_nexthop_cache_head *table,
619 struct interface *ifp, bool up)
620{
621 struct bgp_nexthop_cache *bnc;
622
623 frr_each (bgp_nexthop_cache, table, bnc) {
624 if (bnc->ifindex != ifp->ifindex)
625 continue;
626
083ec940 627 bnc->last_update = monotime(NULL);
8761cd6d
DS
628 bnc->change_flags = 0;
629
474cfe4a
DS
630 /*
631 * For interface based routes ( ala the v6 LL routes
632 * that this was written for ) the metric received
633 * for the connected route is 0 not 1.
634 */
635 bnc->metric = 0;
8761cd6d
DS
636 if (up) {
637 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
638 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
8761cd6d
DS
639 bnc->nexthop_num = 1;
640 } else {
641 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
642 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
643 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
644 bnc->nexthop_num = 0;
8761cd6d
DS
645 }
646
647 evaluate_paths(bnc);
648 }
649}
650static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
651{
652 struct bgp *bgp;
653
096f7609 654 bgp = ifp->vrf->info;
8761cd6d
DS
655 if (!bgp)
656 return;
657
7f2e9cce
DS
658 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
659 up);
660 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
661 up);
8761cd6d
DS
662 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
663 up);
664 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
665 up);
666}
667
668void bgp_nht_ifp_up(struct interface *ifp)
669{
670 bgp_nht_ifp_handle(ifp, true);
671}
672
673void bgp_nht_ifp_down(struct interface *ifp)
674{
675 bgp_nht_ifp_handle(ifp, false);
676}
677
cc9f21da 678static void bgp_nht_ifp_initial(struct thread *thread)
8761cd6d
DS
679{
680 ifindex_t ifindex = THREAD_VAL(thread);
0b52b75a
IR
681 struct bgp *bgp = THREAD_ARG(thread);
682 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
8761cd6d
DS
683
684 if (!ifp)
cc9f21da 685 return;
8761cd6d 686
4115b296 687 if (BGP_DEBUG(nht, NHT))
688 zlog_debug(
689 "Handle NHT initial update for Intf %s(%d) status %s",
690 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
691
8761cd6d
DS
692 if (if_is_up(ifp))
693 bgp_nht_ifp_up(ifp);
694 else
695 bgp_nht_ifp_down(ifp);
8761cd6d
DS
696}
697
698/*
699 * So the bnc code has the ability to handle interface up/down
700 * events to properly handle v6 LL peering.
701 * What is happening here:
702 * The event system for peering expects the nht code to
703 * report on the tracking events after we move to active
704 * So let's give the system a chance to report on that event
705 * in a manner that is expected.
706 */
707void bgp_nht_interface_events(struct peer *peer)
708{
709 struct bgp *bgp = peer->bgp;
710 struct bgp_nexthop_cache_head *table;
711 struct bgp_nexthop_cache *bnc;
712 struct prefix p;
35aae5c9 713 ifindex_t ifindex = 0;
8761cd6d
DS
714
715 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
716 return;
717
718 if (!sockunion2hostprefix(&peer->su, &p))
719 return;
35aae5c9
DS
720 /*
721 * Gather the ifindex for if up/down events to be
722 * tagged into this fun
723 */
724 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
725 ifindex = peer->su.sin6.sin6_scope_id;
8761cd6d
DS
726
727 table = &bgp->nexthop_cache_table[AFI_IP6];
35aae5c9 728 bnc = bnc_find(table, &p, 0, ifindex);
8761cd6d
DS
729 if (!bnc)
730 return;
731
732 if (bnc->ifindex)
0b52b75a 733 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
8761cd6d
DS
734 bnc->ifindex, NULL);
735}
736
545aeef1
RW
737void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
738{
739 struct bgp_nexthop_cache_head *tree = NULL;
b8210849 740 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
545aeef1 741 struct bgp *bgp;
06e4e901 742 struct prefix match;
545aeef1
RW
743 struct zapi_route nhr;
744 afi_t afi;
745
746 bgp = bgp_lookup_by_vrf_id(vrf_id);
747 if (!bgp) {
748 flog_err(
749 EC_BGP_NH_UPD,
750 "parse nexthop update: instance not found for vrf_id %u",
751 vrf_id);
752 return;
753 }
754
06e4e901 755 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
cc42c4f0
DS
756 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
757 bgp->name_pretty);
545aeef1
RW
758 return;
759 }
760
06e4e901 761 afi = family2afi(match.family);
b8210849 762 tree = &bgp->nexthop_cache_table[afi];
545aeef1 763
35aae5c9 764 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
b8210849
DS
765 if (!bnc_nhc) {
766 if (BGP_DEBUG(nht, NHT))
767 zlog_debug(
768 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
769 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
770 } else
9f002fa5 771 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
b8210849
DS
772
773 tree = &bgp->import_check_table[afi];
774
35aae5c9 775 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
b8210849 776 if (!bnc_import) {
2dbe669b 777 if (BGP_DEBUG(nht, NHT))
545aeef1 778 zlog_debug(
b8210849 779 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
2dbe669b 780 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
d00a5f6b
DS
781 } else
782 bgp_process_nexthop_update(bnc_import, &nhr, true);
545aeef1 783
545aeef1
RW
784 /*
785 * HACK: if any BGP route is dependant on an SR-policy that doesn't
786 * exist, zebra will never send NH updates relative to that policy. In
787 * that case, whenever we receive an update about a colorless NH, update
788 * the corresponding colorful NHs that share the same endpoint but that
789 * are inactive. This ugly hack should work around the problem at the
790 * cost of a performance pernalty. Long term, what should be done is to
791 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
792 * which should provide a better infrastructure to solve this issue in
793 * a more efficient and elegant way.
794 */
b8210849 795 if (nhr.srte_color == 0 && bnc_nhc) {
545aeef1
RW
796 struct bgp_nexthop_cache *bnc_iter;
797
798 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
799 bnc_iter) {
d00a5f6b
DS
800 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
801 bnc_iter->srte_color == 0 ||
802 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
545aeef1
RW
803 continue;
804
9f002fa5 805 bgp_process_nexthop_update(bnc_iter, &nhr, false);
545aeef1
RW
806 }
807 }
808}
809
ee7ca6c0 810/*
811 * Cleanup nexthop registration and status information for BGP nexthops
812 * pertaining to this VRF. This is invoked upon VRF deletion.
813 */
814void bgp_cleanup_nexthops(struct bgp *bgp)
815{
f663c581
RW
816 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
817 struct bgp_nexthop_cache *bnc;
ee7ca6c0 818
f663c581
RW
819 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
820 bnc) {
ee7ca6c0 821 /* Clear relevant flags. */
822 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
823 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
824 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
021b6596 825 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
ee7ca6c0 826 }
827 }
828}
829
fb018d25
DS
830/**
831 * make_prefix - make a prefix structure from the path (essentially
832 * path's node.
833 */
40381db7 834static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
fb018d25 835{
078430f6 836
40381db7
DS
837 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
838 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 839 ? 1
840 : 0;
9bcb3eef
DS
841 struct bgp_dest *net = pi->net;
842 const struct prefix *p_orig = bgp_dest_get_prefix(net);
92d6f769 843 struct in_addr ipv4;
0378bcaa
PG
844
845 if (p_orig->family == AF_FLOWSPEC) {
846 if (!pi->peer)
847 return -1;
848 return bgp_flowspec_get_first_nh(pi->peer->bgp,
1840384b 849 pi, p, afi);
0378bcaa 850 }
d62a17ae 851 memset(p, 0, sizeof(struct prefix));
852 switch (afi) {
853 case AFI_IP:
854 p->family = AF_INET;
855 if (is_bgp_static) {
b54892e0
DS
856 p->u.prefix4 = p_orig->u.prefix4;
857 p->prefixlen = p_orig->prefixlen;
d62a17ae 858 } else {
92d6f769
K
859 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
860 ipv4_mapped_ipv6_to_ipv4(
861 &pi->attr->mp_nexthop_global, &ipv4);
862 p->u.prefix4 = ipv4;
863 p->prefixlen = IPV4_MAX_BITLEN;
864 } else {
7226bc40
TA
865 if (p_orig->family == AF_EVPN)
866 p->u.prefix4 =
867 pi->attr->mp_nexthop_global_in;
868 else
869 p->u.prefix4 = pi->attr->nexthop;
92d6f769
K
870 p->prefixlen = IPV4_MAX_BITLEN;
871 }
d62a17ae 872 }
873 break;
874 case AFI_IP6:
d62a17ae 875 p->family = AF_INET6;
876
877 if (is_bgp_static) {
b54892e0
DS
878 p->u.prefix6 = p_orig->u.prefix6;
879 p->prefixlen = p_orig->prefixlen;
d62a17ae 880 } else {
606fdbb1
DA
881 /* If we receive MP_REACH nexthop with ::(LL)
882 * or LL(LL), use LL address as nexthop cache.
883 */
884 if (pi->attr->mp_nexthop_len
885 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
886 && (IN6_IS_ADDR_UNSPECIFIED(
887 &pi->attr->mp_nexthop_global)
888 || IN6_IS_ADDR_LINKLOCAL(
889 &pi->attr->mp_nexthop_global)))
890 p->u.prefix6 = pi->attr->mp_nexthop_local;
17ef5a93
PG
891 /* If we receive MR_REACH with (GA)::(LL)
892 * then check for route-map to choose GA or LL
893 */
894 else if (pi->attr->mp_nexthop_len
895 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
896 if (pi->attr->mp_nexthop_prefer_global)
897 p->u.prefix6 =
898 pi->attr->mp_nexthop_global;
899 else
900 p->u.prefix6 =
901 pi->attr->mp_nexthop_local;
902 } else
606fdbb1 903 p->u.prefix6 = pi->attr->mp_nexthop_global;
d62a17ae 904 p->prefixlen = IPV6_MAX_BITLEN;
905 }
906 break;
907 default:
908 if (BGP_DEBUG(nht, NHT)) {
909 zlog_debug(
910 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
15569c58 911 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 912 }
913 break;
65740e1b 914 }
d62a17ae 915 return 0;
fb018d25
DS
916}
917
918/**
078430f6 919 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
fb018d25
DS
920 * command to Zebra.
921 * ARGUMENTS:
922 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
078430f6 923 * int command -- command to send to zebra
fb018d25
DS
924 * RETURNS:
925 * void.
926 */
d62a17ae 927static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
fb018d25 928{
3c192540 929 bool exact_match = false;
ed6cec97 930 bool resolve_via_default = false;
d62a17ae 931 int ret;
932
3c192540 933 if (!zclient)
d62a17ae 934 return;
935
936 /* Don't try to register if Zebra doesn't know of this instance. */
bb4ef1ae
DS
937 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
938 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
939 zlog_debug(
940 "%s: No zebra instance to talk to, not installing NHT entry",
941 __func__);
d62a17ae 942 return;
bb4ef1ae 943 }
d62a17ae 944
1ee0a2df
DS
945 if (!bgp_zebra_num_connects()) {
946 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
947 zlog_debug(
948 "%s: We have not connected yet, cannot send nexthops",
949 __func__);
1ee0a2df 950 }
ed6cec97
DS
951 if (command == ZEBRA_NEXTHOP_REGISTER) {
952 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
953 exact_match = true;
954 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
955 resolve_via_default = true;
956 }
d62a17ae 957
f663c581
RW
958 if (BGP_DEBUG(zebra, ZEBRA))
959 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
960 zserv_command_string(command), &bnc->prefix,
961 bnc->bgp->name_pretty);
960035b2 962
eb3c9d97
DL
963 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
964 exact_match, resolve_via_default,
965 bnc->bgp->vrf_id);
a6522038 966 if (ret == ZCLIENT_SEND_FAILURE) {
e50f7cfd 967 flog_warn(EC_BGP_ZEBRA_SEND,
f162a5b9 968 "sendmsg_nexthop: zclient_send_message() failed");
a6522038 969 return;
970 }
d62a17ae 971
3d174ce0 972 if (command == ZEBRA_NEXTHOP_REGISTER)
d62a17ae 973 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
3d174ce0 974 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
d62a17ae 975 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
976 return;
fb018d25
DS
977}
978
979/**
078430f6
DS
980 * register_zebra_rnh - register a NH/route with Zebra for notification
981 * when the route or the route to the nexthop changes.
fb018d25 982 * ARGUMENTS:
078430f6 983 * struct bgp_nexthop_cache *bnc
fb018d25
DS
984 * RETURNS:
985 * void.
986 */
23f60ffd 987static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
fb018d25 988{
d62a17ae 989 /* Check if we have already registered */
990 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
991 return;
8761cd6d
DS
992
993 if (bnc->ifindex) {
994 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
995 return;
996 }
997
3d174ce0 998 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
fb018d25
DS
999}
1000
1001/**
078430f6 1002 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
fb018d25 1003 * ARGUMENTS:
078430f6 1004 * struct bgp_nexthop_cache *bnc
fb018d25
DS
1005 * RETURNS:
1006 * void.
1007 */
23f60ffd 1008static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
fb018d25 1009{
d62a17ae 1010 /* Check if we have already registered */
1011 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1012 return;
1013
8761cd6d
DS
1014 if (bnc->ifindex) {
1015 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1016 return;
1017 }
1018
3d174ce0 1019 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
fb018d25
DS
1020}
1021
1022/**
1023 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1024 * ARGUMENTS:
1025 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1026 * RETURNS:
1027 * void.
1028 */
021b6596 1029void evaluate_paths(struct bgp_nexthop_cache *bnc)
fb018d25 1030{
9bcb3eef 1031 struct bgp_dest *dest;
4b7e6066 1032 struct bgp_path_info *path;
d62a17ae 1033 int afi;
1034 struct peer *peer = (struct peer *)bnc->nht_info;
1035 struct bgp_table *table;
1036 safi_t safi;
960035b2 1037 struct bgp *bgp_path;
b54892e0 1038 const struct prefix *p;
d62a17ae 1039
1040 if (BGP_DEBUG(nht, NHT)) {
987a720a 1041 char bnc_buf[BNC_FLAG_DUMP_SIZE];
df2a41a9 1042 char chg_buf[BNC_FLAG_DUMP_SIZE];
987a720a 1043
d62a17ae 1044 zlog_debug(
036f482f
DA
1045 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1046 &bnc->prefix, bnc->ifindex, bnc->srte_color,
35aae5c9 1047 bnc->bgp->name_pretty,
df2a41a9
DS
1048 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1049 sizeof(bnc_buf)),
1050 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1051 sizeof(bnc_buf)));
fb018d25
DS
1052 }
1053
a2addae8 1054 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
d62a17ae 1055 if (!(path->type == ZEBRA_ROUTE_BGP
1056 && ((path->sub_type == BGP_ROUTE_NORMAL)
960035b2
PZ
1057 || (path->sub_type == BGP_ROUTE_STATIC)
1058 || (path->sub_type == BGP_ROUTE_IMPORTED))))
d62a17ae 1059 continue;
1060
9bcb3eef
DS
1061 dest = path->net;
1062 assert(dest && bgp_dest_table(dest));
1063 p = bgp_dest_get_prefix(dest);
b54892e0 1064 afi = family2afi(p->family);
9bcb3eef 1065 table = bgp_dest_table(dest);
d62a17ae 1066 safi = table->safi;
1067
960035b2
PZ
1068 /*
1069 * handle routes from other VRFs (they can have a
1070 * nexthop in THIS VRF). bgp_path is the bgp instance
1071 * that owns the route referencing this nexthop.
1072 */
1073 bgp_path = table->bgp;
1074
1075 /*
1076 * Path becomes valid/invalid depending on whether the nexthop
d62a17ae 1077 * reachable/unreachable.
960035b2
PZ
1078 *
1079 * In case of unicast routes that were imported from vpn
1080 * and that have labels, they are valid only if there are
1081 * nexthops with labels
a2299aba
AD
1082 *
1083 * If the nexthop is EVPN gateway-IP,
1084 * do not check for a valid label.
d62a17ae 1085 */
960035b2 1086
34ea39b6 1087 bool bnc_is_valid_nexthop = false;
1088 bool path_valid = false;
960035b2 1089
021b6596
AD
1090 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1091 && path->extra && path->extra->num_labels
1092 && (path->attr->evpn_overlay.type
1093 != OVERLAY_INDEX_GATEWAY_IP)) {
960035b2 1094 bnc_is_valid_nexthop =
1bb550b6
PG
1095 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1096 : false;
960035b2 1097 } else {
e7cbe5e5
NT
1098 if (bgp_update_martian_nexthop(
1099 bnc->bgp, afi, safi, path->type,
9bcb3eef 1100 path->sub_type, path->attr, dest)) {
e7cbe5e5
NT
1101 if (BGP_DEBUG(nht, NHT))
1102 zlog_debug(
56ca3b5b 1103 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
9bcb3eef 1104 __func__, dest, bgp_path->name);
e7cbe5e5
NT
1105 } else
1106 bnc_is_valid_nexthop =
34ea39b6 1107 bgp_isvalid_nexthop(bnc) ? true : false;
960035b2
PZ
1108 }
1109
9e15d76a 1110 if (BGP_DEBUG(nht, NHT)) {
1111 char buf1[RD_ADDRSTRLEN];
960035b2 1112
9bcb3eef
DS
1113 if (dest->pdest) {
1114 prefix_rd2str((struct prefix_rd *)bgp_dest_get_prefix(dest->pdest),
9e15d76a 1115 buf1, sizeof(buf1));
1116 zlog_debug(
56ca3b5b 1117 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
9bcb3eef 1118 afi, safi, dest, buf1,
9e15d76a 1119 bgp_path->name_pretty, path->flags);
1120 } else
1121 zlog_debug(
56ca3b5b 1122 "... eval path %d/%d %pBD %s flags 0x%x",
9bcb3eef 1123 afi, safi, dest, bgp_path->name_pretty,
9e15d76a 1124 path->flags);
1125 }
d62a17ae 1126
0139efe0 1127 /* Skip paths marked for removal or as history. */
1128 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1129 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1130 continue;
1131
d62a17ae 1132 /* Copy the metric to the path. Will be used for bestpath
1133 * computation */
1134 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
18ee8310
DS
1135 (bgp_path_info_extra_get(path))->igpmetric =
1136 bnc->metric;
d62a17ae 1137 else if (path->extra)
1138 path->extra->igpmetric = 0;
1139
1140 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
545aeef1
RW
1141 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1142 || path->attr->srte_color != 0)
1defdda8 1143 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
d62a17ae 1144
d4980edf 1145 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
34ea39b6 1146 if (path_valid != bnc_is_valid_nexthop) {
1147 if (path_valid) {
1148 /* No longer valid, clear flag; also for EVPN
1149 * routes, unimport from VRFs if needed.
1150 */
1151 bgp_aggregate_decrement(bgp_path, p, path, afi,
1152 safi);
9bcb3eef 1153 bgp_path_info_unset_flag(dest, path,
34ea39b6 1154 BGP_PATH_VALID);
1155 if (safi == SAFI_EVPN &&
9bcb3eef 1156 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 1157 bgp_evpn_unimport_route(bgp_path,
9bcb3eef 1158 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 1159 } else {
1160 /* Path becomes valid, set flag; also for EVPN
1161 * routes, import from VRFs if needed.
1162 */
9bcb3eef 1163 bgp_path_info_set_flag(dest, path,
34ea39b6 1164 BGP_PATH_VALID);
1165 bgp_aggregate_increment(bgp_path, p, path, afi,
1166 safi);
1167 if (safi == SAFI_EVPN &&
9bcb3eef 1168 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 1169 bgp_evpn_import_route(bgp_path,
9bcb3eef 1170 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 1171 }
7c312383
AD
1172 }
1173
9bcb3eef 1174 bgp_process(bgp_path, dest, afi, safi);
d62a17ae 1175 }
fc9a856f 1176
1e91f1d1
DS
1177 if (peer) {
1178 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1179
824065c4
DS
1180 if (valid_nexthops) {
1181 /*
1182 * Peering cannot occur across a blackhole nexthop
1183 */
e817f2cc 1184 if (bnc->nexthop_num == 1 && bnc->nexthop
824065c4
DS
1185 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1186 peer->last_reset = PEER_DOWN_WAITING_NHT;
1187 valid_nexthops = 0;
1188 } else
1189 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1190 } else
1e91f1d1
DS
1191 peer->last_reset = PEER_DOWN_WAITING_NHT;
1192
1193 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1194 if (BGP_DEBUG(nht, NHT))
15569c58 1195 zlog_debug(
8761cd6d 1196 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
15569c58 1197 __func__, peer->host,
8761cd6d
DS
1198 peer->bgp->name_pretty,
1199 !!valid_nexthops);
f8dcd38d 1200 bgp_fsm_nht_update(peer, !!valid_nexthops);
1e91f1d1
DS
1201 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1202 }
d62a17ae 1203 }
fc9a856f 1204
d62a17ae 1205 RESET_FLAG(bnc->change_flags);
fb018d25
DS
1206}
1207
1208/**
1209 * path_nh_map - make or break path-to-nexthop association.
1210 * ARGUMENTS:
1211 * path - pointer to the path structure
1212 * bnc - pointer to the nexthop structure
1213 * make - if set, make the association. if unset, just break the existing
1214 * association.
1215 */
7f040da1
DS
1216void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1217 bool make)
fb018d25 1218{
d62a17ae 1219 if (path->nexthop) {
1220 LIST_REMOVE(path, nh_thread);
1221 path->nexthop->path_count--;
1222 path->nexthop = NULL;
1223 }
1224 if (make) {
1225 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1226 path->nexthop = bnc;
1227 path->nexthop->path_count++;
1228 }
fb018d25 1229}
1ee0a2df
DS
1230
1231/*
1232 * This function is called to register nexthops to zebra
1233 * as that we may have tried to install the nexthops
1234 * before we actually have a zebra connection
1235 */
1236void bgp_nht_register_nexthops(struct bgp *bgp)
1237{
f663c581
RW
1238 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1239 struct bgp_nexthop_cache *bnc;
1ee0a2df 1240
f663c581
RW
1241 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1242 bnc) {
23f60ffd 1243 register_zebra_rnh(bnc);
1ee0a2df
DS
1244 }
1245 }
1246}
1ea03b90 1247
b3a3290e 1248void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1ea03b90
DS
1249{
1250 struct bgp *bgp;
1ea03b90
DS
1251 struct bgp_nexthop_cache *bnc;
1252 struct nexthop *nhop;
1253 struct interface *ifp;
1254 struct prefix p;
35aae5c9 1255 ifindex_t ifindex = 0;
1ea03b90
DS
1256
1257 if (peer->ifp)
1258 return;
1259
1260 bgp = peer->bgp;
1ea03b90 1261 if (!sockunion2hostprefix(&peer->su, &p)) {
b3a3290e
DS
1262 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1263 __func__, peer->host);
1ea03b90
DS
1264 return;
1265 }
1266
1267 if (p.family != AF_INET6)
1268 return;
35aae5c9
DS
1269 /*
1270 * Gather the ifindex for if up/down events to be
1271 * tagged into this fun
1272 */
1273 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1274 ifindex = peer->su.sin6.sin6_scope_id;
1ea03b90 1275
35aae5c9 1276 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1ea03b90
DS
1277 if (!bnc)
1278 return;
1279
1280 if (peer != bnc->nht_info)
1281 return;
1282
1283 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
8c9769e0
DS
1284 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1285
1286 if (!ifp)
1287 continue;
1288
1ea03b90
DS
1289 zclient_send_interface_radv_req(zclient,
1290 nhop->vrf_id,
1291 ifp, true,
1292 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1293 }
1294}
b3a3290e
DS
1295
1296void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1297{
1298 struct bgp *bgp;
b3a3290e
DS
1299 struct bgp_nexthop_cache *bnc;
1300 struct nexthop *nhop;
1301 struct interface *ifp;
1302 struct prefix p;
35aae5c9 1303 ifindex_t ifindex = 0;
b3a3290e
DS
1304
1305 if (peer->ifp)
1306 return;
1307
1308 bgp = peer->bgp;
1309
b3a3290e
DS
1310 if (!sockunion2hostprefix(&peer->su, &p)) {
1311 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1312 __func__, peer->host);
1313 return;
1314 }
1315
1316 if (p.family != AF_INET6)
1317 return;
35aae5c9
DS
1318 /*
1319 * Gather the ifindex for if up/down events to be
1320 * tagged into this fun
1321 */
1322 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1323 ifindex = peer->su.sin6.sin6_scope_id;
b3a3290e 1324
35aae5c9 1325 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
b3a3290e
DS
1326 if (!bnc)
1327 return;
1328
1329 if (peer != bnc->nht_info)
1330 return;
1331
1332 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1333 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1334
68cecc3b
DS
1335 if (!ifp)
1336 continue;
1337
b3a3290e
DS
1338 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1339 0);
1340 }
1341}
c589d847
AK
1342
1343/****************************************************************************
1344 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1345 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1346 * left to the application using it.
1347 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1348 * failover of remote ES links.
1349 ***************************************************************************/
1350static bitfield_t bgp_nh_id_bitmap;
8bcb09a1 1351static uint32_t bgp_l3nhg_start;
c589d847 1352
8bcb09a1
AK
1353/* XXX - currently we do nothing on the callbacks */
1354static void bgp_l3nhg_add_cb(const char *name)
1355{
1356}
1357static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1358 const struct nexthop *nhop)
1359{
1360}
1361static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1362 const struct nexthop *nhop)
1363{
1364}
1365static void bgp_l3nhg_del_cb(const char *name)
c589d847 1366{
c589d847
AK
1367}
1368
8bcb09a1 1369static void bgp_l3nhg_zebra_init(void)
c589d847 1370{
8bcb09a1
AK
1371 static bool bgp_l3nhg_zebra_inited;
1372 if (bgp_l3nhg_zebra_inited)
c589d847
AK
1373 return;
1374
8bcb09a1
AK
1375 bgp_l3nhg_zebra_inited = true;
1376 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1377 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_add_nexthop_cb,
1378 bgp_l3nhg_del_nexthop_cb, bgp_l3nhg_del_cb);
c589d847
AK
1379}
1380
8bcb09a1 1381
c589d847
AK
1382void bgp_l3nhg_init(void)
1383{
8bcb09a1
AK
1384 uint32_t id_max;
1385
7a8ce9d5 1386 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
8bcb09a1 1387 bf_init(bgp_nh_id_bitmap, id_max);
c589d847 1388 bf_assign_zero_index(bgp_nh_id_bitmap);
8bcb09a1
AK
1389
1390 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1391 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1392 bgp_l3nhg_start + id_max);
c589d847
AK
1393}
1394
1395void bgp_l3nhg_finish(void)
1396{
1397 bf_free(bgp_nh_id_bitmap);
1398}
8bcb09a1
AK
1399
1400uint32_t bgp_l3nhg_id_alloc(void)
1401{
1402 uint32_t nhg_id = 0;
1403
1404 bgp_l3nhg_zebra_init();
1405 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1406 if (nhg_id)
1407 nhg_id += bgp_l3nhg_start;
1408
1409 return nhg_id;
1410}
1411
1412void bgp_l3nhg_id_free(uint32_t nhg_id)
1413{
1414 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1415 return;
1416
1417 nhg_id -= bgp_l3nhg_start;
1418
1419 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1420}