]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
Merge pull request #12642 from anlancs/fix/bgpd-debug-name
[mirror_frr.git] / bgpd / bgp_nht.c
1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "thread.h"
25 #include "prefix.h"
26 #include "zclient.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "nexthop.h"
32 #include "vrf.h"
33 #include "filter.h"
34 #include "nexthop_group.h"
35
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49
50 extern struct zclient *zclient;
51
52 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
54 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
55 static void bgp_nht_ifp_initial(struct thread *thread);
56
57 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
58 {
59 return (bgp_zebra_num_connects() == 0
60 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 && bnc->nexthop_num > 0));
62 }
63
64 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
65 struct bgp_path_info *path)
66 {
67 struct interface *ifp = NULL;
68 struct nexthop *nexthop;
69 struct bgp_interface *iifp;
70 struct peer *peer;
71
72 if (!path->extra || !path->extra->peer_orig)
73 return false;
74
75 peer = path->extra->peer_orig;
76
77 /* only connected ebgp peers are valid */
78 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
79 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
80 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
81 return false;
82
83 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
84 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
85 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
86 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
87 ifp = if_lookup_by_index(
88 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
89 bnc->bgp->vrf_id);
90 }
91 if (!ifp)
92 continue;
93 iifp = ifp->info;
94 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
95 return true;
96 }
97 return false;
98 }
99
100 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
101 struct bgp_path_info *path)
102 {
103 struct interface *ifp = NULL;
104 struct nexthop *nexthop;
105
106 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
107 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
108 ifp = if_lookup_by_index(
109 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
110 bnc->bgp->vrf_id);
111 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
112 ifp->ll_type == ZEBRA_LLT_IP6GRE))
113 break;
114 }
115 }
116 if (!ifp)
117 return false;
118
119 if (CHECK_FLAG(path->attr->rmap_change_flags,
120 BATTR_RMAP_L3VPN_ACCEPT_GRE))
121 return true;
122
123 return false;
124 }
125
126 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
127 struct bgp_path_info *path)
128 {
129 /*
130 * - In the case of MPLS-VPN, the label is learned from LDP or other
131 * protocols, and nexthop tracking is enabled for the label.
132 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
133 * - In the case of SRv6-VPN, we need to track the reachability to the
134 * SID (in other words, IPv6 address). As in MPLS, we need to record
135 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
136 * currently not implemented, and this function assumes that all
137 * Transit routes for SRv6-VPN are valid.
138 * - Otherwise check for mpls-gre acceptance
139 */
140 return (bgp_zebra_num_connects() == 0 ||
141 (bnc && (bnc->nexthop_num > 0 &&
142 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
143 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
144 bnc->bgp->srv6_enabled ||
145 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
146 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
147 }
148
149 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
150 {
151 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
152 if (BGP_DEBUG(nht, NHT))
153 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
154 &bnc->prefix, bnc->ifindex, bnc->srte_color,
155 bnc->bgp->name_pretty);
156 /* only unregister if this is the last nh for this prefix*/
157 if (!bnc_existing_for_prefix(bnc))
158 unregister_zebra_rnh(bnc);
159 bnc_free(bnc);
160 }
161 }
162
163 void bgp_unlink_nexthop(struct bgp_path_info *path)
164 {
165 struct bgp_nexthop_cache *bnc = path->nexthop;
166
167 if (!bnc)
168 return;
169
170 path_nh_map(path, NULL, false);
171
172 bgp_unlink_nexthop_check(bnc);
173 }
174
175 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
176 {
177 struct prefix pp;
178 struct prefix pt;
179 struct bgp_nexthop_cache *bncp, *bnct;
180 afi_t afi;
181 ifindex_t ifindex = 0;
182
183 if (!sockunion2hostprefix(&from->su, &pp))
184 return;
185
186 /*
187 * Gather the ifindex for if up/down events to be
188 * tagged into this fun
189 */
190 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
191 ifindex = from->su.sin6.sin6_scope_id;
192
193 afi = family2afi(pp.family);
194 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
195
196 if (!sockunion2hostprefix(&to->su, &pt))
197 return;
198
199 /*
200 * Gather the ifindex for if up/down events to be
201 * tagged into this fun
202 */
203 ifindex = 0;
204 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
205 ifindex = to->su.sin6.sin6_scope_id;
206 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
207
208 if (bnct != bncp)
209 return;
210
211 if (bnct)
212 bnct->nht_info = to;
213 }
214
215 /*
216 * Returns the bnc whose bnc->nht_info matches the LL peer by
217 * looping through the IPv6 nexthop table
218 */
219 static struct bgp_nexthop_cache *
220 bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
221 {
222 struct bgp_nexthop_cache *bnc;
223
224 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
225 bnc) {
226 if (bnc->nht_info == peer) {
227 if (BGP_DEBUG(nht, NHT)) {
228 zlog_debug(
229 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
230 &bnc->prefix, bnc->ifindex,
231 bnc->srte_color, bnc, peer->host,
232 peer->bgp->name_pretty, peer);
233 }
234 return bnc;
235 }
236 }
237
238 if (BGP_DEBUG(nht, NHT))
239 zlog_debug(
240 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
241 peer->host, peer->bgp->name_pretty, peer);
242
243 return NULL;
244 }
245
246 void bgp_unlink_nexthop_by_peer(struct peer *peer)
247 {
248 struct prefix p;
249 struct bgp_nexthop_cache *bnc;
250 afi_t afi = family2afi(peer->su.sa.sa_family);
251 ifindex_t ifindex = 0;
252
253 if (!sockunion2hostprefix(&peer->su, &p)) {
254 /*
255 * In scenarios where unnumbered BGP session is brought
256 * down by shutting down the interface before unconfiguring
257 * the BGP neighbor, neighbor information in peer->su.sa
258 * will be cleared when the interface is shutdown. So
259 * during the deletion of unnumbered bgp peer, above check
260 * will return true. Therefore, in this case,BGP needs to
261 * find the bnc whose bnc->nht_info matches the
262 * peer being deleted and free it.
263 */
264 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
265 } else {
266 /*
267 * Gather the ifindex for if up/down events to be
268 * tagged into this fun
269 */
270 if (afi == AFI_IP6 &&
271 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
272 ifindex = peer->su.sin6.sin6_scope_id;
273 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
274 ifindex);
275 }
276
277 if (!bnc)
278 return;
279
280 /* cleanup the peer reference */
281 bnc->nht_info = NULL;
282
283 bgp_unlink_nexthop_check(bnc);
284 }
285
286 /*
287 * A route and its nexthop might belong to different VRFs. Therefore,
288 * we need both the bgp_route and bgp_nexthop pointers.
289 */
290 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
291 afi_t afi, safi_t safi, struct bgp_path_info *pi,
292 struct peer *peer, int connected,
293 const struct prefix *orig_prefix)
294 {
295 struct bgp_nexthop_cache_head *tree = NULL;
296 struct bgp_nexthop_cache *bnc;
297 struct bgp_path_info *bpi_ultimate;
298 struct prefix p;
299 uint32_t srte_color = 0;
300 int is_bgp_static_route = 0;
301 ifindex_t ifindex = 0;
302
303 if (pi) {
304 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
305 && (pi->sub_type == BGP_ROUTE_STATIC))
306 ? 1
307 : 0;
308
309 /* Since Extended Next-hop Encoding (RFC5549) support, we want
310 to derive
311 address-family from the next-hop. */
312 if (!is_bgp_static_route)
313 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
314 : AFI_IP;
315
316 /* Validation for the ipv4 mapped ipv6 nexthop. */
317 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
318 afi = AFI_IP;
319 }
320
321 /* This will return true if the global IPv6 NH is a link local
322 * addr */
323 if (make_prefix(afi, pi, &p) < 0)
324 return 1;
325
326 if (!is_bgp_static_route && orig_prefix
327 && prefix_same(&p, orig_prefix)) {
328 if (BGP_DEBUG(nht, NHT)) {
329 zlog_debug(
330 "%s(%pFX): prefix loops through itself",
331 __func__, &p);
332 }
333 return 0;
334 }
335
336 srte_color = pi->attr->srte_color;
337 } else if (peer) {
338 /*
339 * Gather the ifindex for if up/down events to be
340 * tagged into this fun
341 */
342 if (afi == AFI_IP6 &&
343 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
344 ifindex = peer->su.sin6.sin6_scope_id;
345 if (ifindex == 0) {
346 if (BGP_DEBUG(nht, NHT)) {
347 zlog_debug(
348 "%s: Unable to locate ifindex, waiting till we have one",
349 peer->conf_if);
350 }
351 return 0;
352 }
353 }
354
355 if (!sockunion2hostprefix(&peer->su, &p)) {
356 if (BGP_DEBUG(nht, NHT)) {
357 zlog_debug(
358 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
359 __func__, afi, AFI_IP, AFI_IP6);
360 }
361 return 0;
362 }
363 } else
364 return 0;
365
366 if (is_bgp_static_route)
367 tree = &bgp_nexthop->import_check_table[afi];
368 else
369 tree = &bgp_nexthop->nexthop_cache_table[afi];
370
371 bnc = bnc_find(tree, &p, srte_color, ifindex);
372 if (!bnc) {
373 bnc = bnc_new(tree, &p, srte_color, ifindex);
374 bnc->bgp = bgp_nexthop;
375 if (BGP_DEBUG(nht, NHT))
376 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
377 &bnc->prefix, bnc->ifindex, bnc->srte_color,
378 bnc->bgp->name_pretty, peer);
379 } else {
380 if (BGP_DEBUG(nht, NHT))
381 zlog_debug(
382 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
383 &bnc->prefix, bnc->ifindex,
384 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
385 bnc->path_count, bnc->nht_info);
386 }
387
388 if (pi && is_route_parent_evpn(pi))
389 bnc->is_evpn_gwip_nexthop = true;
390
391 if (is_bgp_static_route) {
392 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
393
394 /* If we're toggling the type, re-register */
395 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
396 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
397 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
398 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
399 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
400 } else if ((!CHECK_FLAG(bgp_route->flags,
401 BGP_FLAG_IMPORT_CHECK))
402 && CHECK_FLAG(bnc->flags,
403 BGP_STATIC_ROUTE_EXACT_MATCH)) {
404 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
405 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
406 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
407 }
408 }
409 /* When nexthop is already known, but now requires 'connected'
410 * resolution,
411 * re-register it. The reverse scenario where the nexthop currently
412 * requires
413 * 'connected' resolution does not need a re-register (i.e., we treat
414 * 'connected-required' as an override) except in the scenario where
415 * this
416 * is actually a case of tracking a peer for connectivity (e.g., after
417 * disable connected-check).
418 * NOTE: We don't track the number of paths separately for 'connected-
419 * required' vs 'connected-not-required' as this change is not a common
420 * scenario.
421 */
422 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
423 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
424 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
425 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
426 } else if (peer && !connected
427 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
428 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
429 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
430 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
431 }
432 if (peer && (bnc->ifindex != ifindex)) {
433 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
434 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
435 bnc->ifindex = ifindex;
436 }
437 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
438 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
439 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
440 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
441 && !is_default_host_route(&bnc->prefix))
442 register_zebra_rnh(bnc);
443
444 if (pi && pi->nexthop != bnc) {
445 /* Unlink from existing nexthop cache, if any. This will also
446 * free
447 * the nexthop cache entry, if appropriate.
448 */
449 bgp_unlink_nexthop(pi);
450
451 /* updates NHT pi list reference */
452 path_nh_map(pi, bnc, true);
453
454 bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
455 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
456 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
457 bnc->metric;
458 else if (bpi_ultimate->extra)
459 bpi_ultimate->extra->igpmetric = 0;
460 } else if (peer) {
461 /*
462 * Let's not accidentally save the peer data for a peer
463 * we are going to throw away in a second or so.
464 * When we come back around we'll fix up this
465 * data properly in replace_nexthop_by_peer
466 */
467 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
468 bnc->nht_info = (void *)peer; /* NHT peer reference */
469 }
470
471 /*
472 * We are cheating here. Views have no associated underlying
473 * ability to detect nexthops. So when we have a view
474 * just tell everyone the nexthop is valid
475 */
476 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
477 return 1;
478 else if (safi == SAFI_UNICAST && pi &&
479 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
480 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
481 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
482 else
483 return (bgp_isvalid_nexthop(bnc));
484 }
485
486 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
487 {
488 struct bgp_nexthop_cache *bnc;
489 struct prefix p;
490 ifindex_t ifindex = 0;
491
492 if (!peer)
493 return;
494
495 /*
496 * In case the below check evaluates true and if
497 * the bnc has not been freed at this point, then
498 * we might have to do something similar to what's
499 * done in bgp_unlink_nexthop_by_peer(). Since
500 * bgp_unlink_nexthop_by_peer() loops through the
501 * nodes of V6 nexthop cache to find the bnc, it is
502 * currently not being called here.
503 */
504 if (!sockunion2hostprefix(&peer->su, &p))
505 return;
506 /*
507 * Gather the ifindex for if up/down events to be
508 * tagged into this fun
509 */
510 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
511 ifindex = peer->su.sin6.sin6_scope_id;
512 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
513 &p, 0, ifindex);
514 if (!bnc) {
515 if (BGP_DEBUG(nht, NHT))
516 zlog_debug(
517 "Cannot find connected NHT node for peer %s(%s)",
518 peer->host, peer->bgp->name_pretty);
519 return;
520 }
521
522 if (bnc->nht_info != peer) {
523 if (BGP_DEBUG(nht, NHT))
524 zlog_debug(
525 "Connected NHT %p node for peer %s(%s) points to %p",
526 bnc, peer->host, bnc->bgp->name_pretty,
527 bnc->nht_info);
528 return;
529 }
530
531 bnc->nht_info = NULL;
532
533 if (LIST_EMPTY(&(bnc->paths))) {
534 if (BGP_DEBUG(nht, NHT))
535 zlog_debug(
536 "Freeing connected NHT node %p for peer %s(%s)",
537 bnc, peer->host, bnc->bgp->name_pretty);
538 unregister_zebra_rnh(bnc);
539 bnc_free(bnc);
540 }
541 }
542
543 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
544 struct zapi_route *nhr,
545 bool import_check)
546 {
547 struct nexthop *nexthop;
548 struct nexthop *oldnh;
549 struct nexthop *nhlist_head = NULL;
550 struct nexthop *nhlist_tail = NULL;
551 int i;
552 bool evpn_resolved = false;
553
554 bnc->last_update = monotime(NULL);
555 bnc->change_flags = 0;
556
557 /* debug print the input */
558 if (BGP_DEBUG(nht, NHT)) {
559 char bnc_buf[BNC_FLAG_DUMP_SIZE];
560
561 zlog_debug(
562 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
563 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
564 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
565 nhr->nexthop_num, bnc->nexthop_num,
566 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
567 sizeof(bnc_buf)));
568 }
569
570 if (nhr->metric != bnc->metric)
571 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
572
573 if (nhr->nexthop_num != bnc->nexthop_num)
574 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
575
576 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
577 !prefix_same(&bnc->prefix, &nhr->prefix))) {
578 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
579 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
580 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
581 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
582
583 bnc_nexthop_free(bnc);
584 bnc->nexthop = NULL;
585
586 if (BGP_DEBUG(nht, NHT))
587 zlog_debug(
588 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
589 __func__, &bnc->prefix, &nhr->prefix);
590 } else if (nhr->nexthop_num) {
591 struct peer *peer = bnc->nht_info;
592
593 /* notify bgp fsm if nbr ip goes from invalid->valid */
594 if (!bnc->nexthop_num)
595 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
596
597 if (!bnc->is_evpn_gwip_nexthop)
598 bnc->flags |= BGP_NEXTHOP_VALID;
599 bnc->metric = nhr->metric;
600 bnc->nexthop_num = nhr->nexthop_num;
601
602 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
603
604 for (i = 0; i < nhr->nexthop_num; i++) {
605 int num_labels = 0;
606
607 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
608
609 /*
610 * Turn on RA for the v6 nexthops
611 * we receive from bgp. This is to allow us
612 * to work with v4 routing over v6 nexthops
613 */
614 if (peer && !peer->ifp
615 && CHECK_FLAG(peer->flags,
616 PEER_FLAG_CAPABILITY_ENHE)
617 && nhr->prefix.family == AF_INET6
618 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
619 struct interface *ifp;
620
621 ifp = if_lookup_by_index(nexthop->ifindex,
622 nexthop->vrf_id);
623 if (ifp)
624 zclient_send_interface_radv_req(
625 zclient, nexthop->vrf_id, ifp,
626 true,
627 BGP_UNNUM_DEFAULT_RA_INTERVAL);
628 }
629 /* There is at least one label-switched path */
630 if (nexthop->nh_label &&
631 nexthop->nh_label->num_labels) {
632
633 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
634 num_labels = nexthop->nh_label->num_labels;
635 }
636
637 if (BGP_DEBUG(nht, NHT)) {
638 char buf[NEXTHOP_STRLEN];
639 zlog_debug(
640 " nhop via %s (%d labels)",
641 nexthop2str(nexthop, buf, sizeof(buf)),
642 num_labels);
643 }
644
645 if (nhlist_tail) {
646 nhlist_tail->next = nexthop;
647 nhlist_tail = nexthop;
648 } else {
649 nhlist_tail = nexthop;
650 nhlist_head = nexthop;
651 }
652
653 /* No need to evaluate the nexthop if we have already
654 * determined
655 * that there has been a change.
656 */
657 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
658 continue;
659
660 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
661 if (nexthop_same(oldnh, nexthop))
662 break;
663
664 if (!oldnh)
665 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
666 }
667 bnc_nexthop_free(bnc);
668 bnc->nexthop = nhlist_head;
669
670 /*
671 * Gateway IP nexthop is L3 reachable. Mark it as
672 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
673 * remote EVPN RT-2.
674 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
675 * When its mapping with EVPN RT-2 is established, unset
676 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
677 */
678 if (bnc->is_evpn_gwip_nexthop) {
679 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
680
681 if (BGP_DEBUG(nht, NHT))
682 zlog_debug(
683 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
684 &bnc->prefix,
685 (evpn_resolved ? "successful"
686 : "failed"));
687
688 if (evpn_resolved) {
689 bnc->flags |= BGP_NEXTHOP_VALID;
690 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
691 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
692 } else {
693 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
694 bnc->flags &= ~BGP_NEXTHOP_VALID;
695 }
696 }
697 } else {
698 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
699 bnc->flags &= ~BGP_NEXTHOP_VALID;
700 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
701 bnc->nexthop_num = nhr->nexthop_num;
702
703 /* notify bgp fsm if nbr ip goes from valid->invalid */
704 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
705
706 bnc_nexthop_free(bnc);
707 bnc->nexthop = NULL;
708 }
709
710 evaluate_paths(bnc);
711 }
712
713 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
714 struct bgp_nexthop_cache_head *table,
715 struct interface *ifp, bool up)
716 {
717 struct bgp_nexthop_cache *bnc;
718
719 frr_each (bgp_nexthop_cache, table, bnc) {
720 if (bnc->ifindex != ifp->ifindex)
721 continue;
722
723 bnc->last_update = monotime(NULL);
724 bnc->change_flags = 0;
725
726 /*
727 * For interface based routes ( ala the v6 LL routes
728 * that this was written for ) the metric received
729 * for the connected route is 0 not 1.
730 */
731 bnc->metric = 0;
732 if (up) {
733 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
734 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
735 bnc->nexthop_num = 1;
736 } else {
737 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
738 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
739 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
740 bnc->nexthop_num = 0;
741 }
742
743 evaluate_paths(bnc);
744 }
745 }
746 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
747 {
748 struct bgp *bgp;
749
750 bgp = ifp->vrf->info;
751 if (!bgp)
752 return;
753
754 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
755 up);
756 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
757 up);
758 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
759 up);
760 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
761 up);
762 }
763
764 void bgp_nht_ifp_up(struct interface *ifp)
765 {
766 bgp_nht_ifp_handle(ifp, true);
767 }
768
769 void bgp_nht_ifp_down(struct interface *ifp)
770 {
771 bgp_nht_ifp_handle(ifp, false);
772 }
773
774 static void bgp_nht_ifp_initial(struct thread *thread)
775 {
776 ifindex_t ifindex = THREAD_VAL(thread);
777 struct bgp *bgp = THREAD_ARG(thread);
778 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
779
780 if (!ifp)
781 return;
782
783 if (BGP_DEBUG(nht, NHT))
784 zlog_debug(
785 "Handle NHT initial update for Intf %s(%d) status %s",
786 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
787
788 if (if_is_up(ifp))
789 bgp_nht_ifp_up(ifp);
790 else
791 bgp_nht_ifp_down(ifp);
792 }
793
794 /*
795 * So the bnc code has the ability to handle interface up/down
796 * events to properly handle v6 LL peering.
797 * What is happening here:
798 * The event system for peering expects the nht code to
799 * report on the tracking events after we move to active
800 * So let's give the system a chance to report on that event
801 * in a manner that is expected.
802 */
803 void bgp_nht_interface_events(struct peer *peer)
804 {
805 struct bgp *bgp = peer->bgp;
806 struct bgp_nexthop_cache_head *table;
807 struct bgp_nexthop_cache *bnc;
808 struct prefix p;
809 ifindex_t ifindex = 0;
810
811 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
812 return;
813
814 if (!sockunion2hostprefix(&peer->su, &p))
815 return;
816 /*
817 * Gather the ifindex for if up/down events to be
818 * tagged into this fun
819 */
820 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
821 ifindex = peer->su.sin6.sin6_scope_id;
822
823 table = &bgp->nexthop_cache_table[AFI_IP6];
824 bnc = bnc_find(table, &p, 0, ifindex);
825 if (!bnc)
826 return;
827
828 if (bnc->ifindex)
829 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
830 bnc->ifindex, NULL);
831 }
832
833 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
834 {
835 struct bgp_nexthop_cache_head *tree = NULL;
836 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
837 struct bgp *bgp;
838 struct prefix match;
839 struct zapi_route nhr;
840 afi_t afi;
841
842 bgp = bgp_lookup_by_vrf_id(vrf_id);
843 if (!bgp) {
844 flog_err(
845 EC_BGP_NH_UPD,
846 "parse nexthop update: instance not found for vrf_id %u",
847 vrf_id);
848 return;
849 }
850
851 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
852 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
853 bgp->name_pretty);
854 return;
855 }
856
857 afi = family2afi(match.family);
858 tree = &bgp->nexthop_cache_table[afi];
859
860 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
861 if (!bnc_nhc) {
862 if (BGP_DEBUG(nht, NHT))
863 zlog_debug(
864 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
865 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
866 } else
867 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
868
869 tree = &bgp->import_check_table[afi];
870
871 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
872 if (!bnc_import) {
873 if (BGP_DEBUG(nht, NHT))
874 zlog_debug(
875 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
876 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
877 } else
878 bgp_process_nexthop_update(bnc_import, &nhr, true);
879
880 /*
881 * HACK: if any BGP route is dependant on an SR-policy that doesn't
882 * exist, zebra will never send NH updates relative to that policy. In
883 * that case, whenever we receive an update about a colorless NH, update
884 * the corresponding colorful NHs that share the same endpoint but that
885 * are inactive. This ugly hack should work around the problem at the
886 * cost of a performance pernalty. Long term, what should be done is to
887 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
888 * which should provide a better infrastructure to solve this issue in
889 * a more efficient and elegant way.
890 */
891 if (nhr.srte_color == 0 && bnc_nhc) {
892 struct bgp_nexthop_cache *bnc_iter;
893
894 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
895 bnc_iter) {
896 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
897 bnc_iter->srte_color == 0 ||
898 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
899 continue;
900
901 bgp_process_nexthop_update(bnc_iter, &nhr, false);
902 }
903 }
904 }
905
906 /*
907 * Cleanup nexthop registration and status information for BGP nexthops
908 * pertaining to this VRF. This is invoked upon VRF deletion.
909 */
910 void bgp_cleanup_nexthops(struct bgp *bgp)
911 {
912 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
913 struct bgp_nexthop_cache *bnc;
914
915 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
916 bnc) {
917 /* Clear relevant flags. */
918 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
919 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
920 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
921 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
922 }
923 }
924 }
925
926 /**
927 * make_prefix - make a prefix structure from the path (essentially
928 * path's node.
929 */
930 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
931 {
932
933 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
934 && (pi->sub_type == BGP_ROUTE_STATIC))
935 ? 1
936 : 0;
937 struct bgp_dest *net = pi->net;
938 const struct prefix *p_orig = bgp_dest_get_prefix(net);
939 struct in_addr ipv4;
940
941 if (p_orig->family == AF_FLOWSPEC) {
942 if (!pi->peer)
943 return -1;
944 return bgp_flowspec_get_first_nh(pi->peer->bgp,
945 pi, p, afi);
946 }
947 memset(p, 0, sizeof(struct prefix));
948 switch (afi) {
949 case AFI_IP:
950 p->family = AF_INET;
951 if (is_bgp_static) {
952 p->u.prefix4 = p_orig->u.prefix4;
953 p->prefixlen = p_orig->prefixlen;
954 } else {
955 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
956 ipv4_mapped_ipv6_to_ipv4(
957 &pi->attr->mp_nexthop_global, &ipv4);
958 p->u.prefix4 = ipv4;
959 p->prefixlen = IPV4_MAX_BITLEN;
960 } else {
961 if (p_orig->family == AF_EVPN)
962 p->u.prefix4 =
963 pi->attr->mp_nexthop_global_in;
964 else
965 p->u.prefix4 = pi->attr->nexthop;
966 p->prefixlen = IPV4_MAX_BITLEN;
967 }
968 }
969 break;
970 case AFI_IP6:
971 p->family = AF_INET6;
972
973 if (is_bgp_static) {
974 p->u.prefix6 = p_orig->u.prefix6;
975 p->prefixlen = p_orig->prefixlen;
976 } else {
977 /* If we receive MP_REACH nexthop with ::(LL)
978 * or LL(LL), use LL address as nexthop cache.
979 */
980 if (pi->attr->mp_nexthop_len
981 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
982 && (IN6_IS_ADDR_UNSPECIFIED(
983 &pi->attr->mp_nexthop_global)
984 || IN6_IS_ADDR_LINKLOCAL(
985 &pi->attr->mp_nexthop_global)))
986 p->u.prefix6 = pi->attr->mp_nexthop_local;
987 /* If we receive MR_REACH with (GA)::(LL)
988 * then check for route-map to choose GA or LL
989 */
990 else if (pi->attr->mp_nexthop_len
991 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
992 if (pi->attr->mp_nexthop_prefer_global)
993 p->u.prefix6 =
994 pi->attr->mp_nexthop_global;
995 else
996 p->u.prefix6 =
997 pi->attr->mp_nexthop_local;
998 } else
999 p->u.prefix6 = pi->attr->mp_nexthop_global;
1000 p->prefixlen = IPV6_MAX_BITLEN;
1001 }
1002 break;
1003 default:
1004 if (BGP_DEBUG(nht, NHT)) {
1005 zlog_debug(
1006 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
1007 __func__, afi, AFI_IP, AFI_IP6);
1008 }
1009 break;
1010 }
1011 return 0;
1012 }
1013
1014 /**
1015 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1016 * command to Zebra.
1017 * ARGUMENTS:
1018 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1019 * int command -- command to send to zebra
1020 * RETURNS:
1021 * void.
1022 */
1023 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1024 {
1025 bool exact_match = false;
1026 bool resolve_via_default = false;
1027 int ret;
1028
1029 if (!zclient)
1030 return;
1031
1032 /* Don't try to register if Zebra doesn't know of this instance. */
1033 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1034 if (BGP_DEBUG(zebra, ZEBRA))
1035 zlog_debug(
1036 "%s: No zebra instance to talk to, not installing NHT entry",
1037 __func__);
1038 return;
1039 }
1040
1041 if (!bgp_zebra_num_connects()) {
1042 if (BGP_DEBUG(zebra, ZEBRA))
1043 zlog_debug(
1044 "%s: We have not connected yet, cannot send nexthops",
1045 __func__);
1046 }
1047 if (command == ZEBRA_NEXTHOP_REGISTER) {
1048 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1049 exact_match = true;
1050 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1051 resolve_via_default = true;
1052 }
1053
1054 if (BGP_DEBUG(zebra, ZEBRA))
1055 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1056 zserv_command_string(command), &bnc->prefix,
1057 bnc->bgp->name_pretty);
1058
1059 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1060 exact_match, resolve_via_default,
1061 bnc->bgp->vrf_id);
1062 if (ret == ZCLIENT_SEND_FAILURE) {
1063 flog_warn(EC_BGP_ZEBRA_SEND,
1064 "sendmsg_nexthop: zclient_send_message() failed");
1065 return;
1066 }
1067
1068 if (command == ZEBRA_NEXTHOP_REGISTER)
1069 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1070 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1071 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1072 return;
1073 }
1074
1075 /**
1076 * register_zebra_rnh - register a NH/route with Zebra for notification
1077 * when the route or the route to the nexthop changes.
1078 * ARGUMENTS:
1079 * struct bgp_nexthop_cache *bnc
1080 * RETURNS:
1081 * void.
1082 */
1083 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1084 {
1085 /* Check if we have already registered */
1086 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1087 return;
1088
1089 if (bnc->ifindex) {
1090 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1091 return;
1092 }
1093
1094 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1095 }
1096
1097 /**
1098 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1099 * ARGUMENTS:
1100 * struct bgp_nexthop_cache *bnc
1101 * RETURNS:
1102 * void.
1103 */
1104 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1105 {
1106 /* Check if we have already registered */
1107 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1108 return;
1109
1110 if (bnc->ifindex) {
1111 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1112 return;
1113 }
1114
1115 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1116 }
1117
1118 /**
1119 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1120 * ARGUMENTS:
1121 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1122 * RETURNS:
1123 * void.
1124 */
1125 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1126 {
1127 struct bgp_dest *dest;
1128 struct bgp_path_info *path;
1129 struct bgp_path_info *bpi_ultimate;
1130 int afi;
1131 struct peer *peer = (struct peer *)bnc->nht_info;
1132 struct bgp_table *table;
1133 safi_t safi;
1134 struct bgp *bgp_path;
1135 const struct prefix *p;
1136
1137 if (BGP_DEBUG(nht, NHT)) {
1138 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1139 char chg_buf[BNC_FLAG_DUMP_SIZE];
1140
1141 zlog_debug(
1142 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1143 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1144 bnc->bgp->name_pretty,
1145 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1146 sizeof(bnc_buf)),
1147 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1148 sizeof(bnc_buf)));
1149 }
1150
1151 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1152 if (!(path->type == ZEBRA_ROUTE_BGP
1153 && ((path->sub_type == BGP_ROUTE_NORMAL)
1154 || (path->sub_type == BGP_ROUTE_STATIC)
1155 || (path->sub_type == BGP_ROUTE_IMPORTED))))
1156 continue;
1157
1158 dest = path->net;
1159 assert(dest && bgp_dest_table(dest));
1160 p = bgp_dest_get_prefix(dest);
1161 afi = family2afi(p->family);
1162 table = bgp_dest_table(dest);
1163 safi = table->safi;
1164
1165 /*
1166 * handle routes from other VRFs (they can have a
1167 * nexthop in THIS VRF). bgp_path is the bgp instance
1168 * that owns the route referencing this nexthop.
1169 */
1170 bgp_path = table->bgp;
1171
1172 /*
1173 * Path becomes valid/invalid depending on whether the nexthop
1174 * reachable/unreachable.
1175 *
1176 * In case of unicast routes that were imported from vpn
1177 * and that have labels, they are valid only if there are
1178 * nexthops with labels
1179 *
1180 * If the nexthop is EVPN gateway-IP,
1181 * do not check for a valid label.
1182 */
1183
1184 bool bnc_is_valid_nexthop = false;
1185 bool path_valid = false;
1186
1187 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1188 && path->extra && path->extra->num_labels
1189 && (path->attr->evpn_overlay.type
1190 != OVERLAY_INDEX_GATEWAY_IP)) {
1191 bnc_is_valid_nexthop =
1192 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1193 : false;
1194 } else {
1195 if (bgp_update_martian_nexthop(
1196 bnc->bgp, afi, safi, path->type,
1197 path->sub_type, path->attr, dest)) {
1198 if (BGP_DEBUG(nht, NHT))
1199 zlog_debug(
1200 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1201 __func__, dest, bgp_path->name);
1202 } else
1203 bnc_is_valid_nexthop =
1204 bgp_isvalid_nexthop(bnc) ? true : false;
1205 }
1206
1207 if (BGP_DEBUG(nht, NHT)) {
1208 if (dest->pdest)
1209 zlog_debug(
1210 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1211 afi, safi, dest,
1212 (struct prefix_rd *)bgp_dest_get_prefix(
1213 dest->pdest),
1214 bgp_path->name_pretty, path->flags);
1215 else
1216 zlog_debug(
1217 "... eval path %d/%d %pBD %s flags 0x%x",
1218 afi, safi, dest, bgp_path->name_pretty,
1219 path->flags);
1220 }
1221
1222 /* Skip paths marked for removal or as history. */
1223 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1224 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1225 continue;
1226
1227 /* Copy the metric to the path. Will be used for bestpath
1228 * computation */
1229 bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
1230 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1231 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
1232 bnc->metric;
1233 else if (bpi_ultimate->extra)
1234 bpi_ultimate->extra->igpmetric = 0;
1235
1236 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1237 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1238 || path->attr->srte_color != 0)
1239 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1240
1241 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1242 if (path_valid != bnc_is_valid_nexthop) {
1243 if (path_valid) {
1244 /* No longer valid, clear flag; also for EVPN
1245 * routes, unimport from VRFs if needed.
1246 */
1247 bgp_aggregate_decrement(bgp_path, p, path, afi,
1248 safi);
1249 bgp_path_info_unset_flag(dest, path,
1250 BGP_PATH_VALID);
1251 if (safi == SAFI_EVPN &&
1252 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1253 bgp_evpn_unimport_route(bgp_path,
1254 afi, safi, bgp_dest_get_prefix(dest), path);
1255 } else {
1256 /* Path becomes valid, set flag; also for EVPN
1257 * routes, import from VRFs if needed.
1258 */
1259 bgp_path_info_set_flag(dest, path,
1260 BGP_PATH_VALID);
1261 bgp_aggregate_increment(bgp_path, p, path, afi,
1262 safi);
1263 if (safi == SAFI_EVPN &&
1264 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1265 bgp_evpn_import_route(bgp_path,
1266 afi, safi, bgp_dest_get_prefix(dest), path);
1267 }
1268 }
1269
1270 bgp_process(bgp_path, dest, afi, safi);
1271 }
1272
1273 if (peer) {
1274 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1275
1276 if (valid_nexthops) {
1277 /*
1278 * Peering cannot occur across a blackhole nexthop
1279 */
1280 if (bnc->nexthop_num == 1 && bnc->nexthop
1281 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1282 peer->last_reset = PEER_DOWN_WAITING_NHT;
1283 valid_nexthops = 0;
1284 } else
1285 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1286 } else
1287 peer->last_reset = PEER_DOWN_WAITING_NHT;
1288
1289 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1290 if (BGP_DEBUG(nht, NHT))
1291 zlog_debug(
1292 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1293 __func__, peer->host,
1294 peer->bgp->name_pretty,
1295 !!valid_nexthops);
1296 bgp_fsm_nht_update(peer, !!valid_nexthops);
1297 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1298 }
1299 }
1300
1301 RESET_FLAG(bnc->change_flags);
1302 }
1303
1304 /**
1305 * path_nh_map - make or break path-to-nexthop association.
1306 * ARGUMENTS:
1307 * path - pointer to the path structure
1308 * bnc - pointer to the nexthop structure
1309 * make - if set, make the association. if unset, just break the existing
1310 * association.
1311 */
1312 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1313 bool make)
1314 {
1315 if (path->nexthop) {
1316 LIST_REMOVE(path, nh_thread);
1317 path->nexthop->path_count--;
1318 path->nexthop = NULL;
1319 }
1320 if (make) {
1321 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1322 path->nexthop = bnc;
1323 path->nexthop->path_count++;
1324 }
1325 }
1326
1327 /*
1328 * This function is called to register nexthops to zebra
1329 * as that we may have tried to install the nexthops
1330 * before we actually have a zebra connection
1331 */
1332 void bgp_nht_register_nexthops(struct bgp *bgp)
1333 {
1334 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1335 struct bgp_nexthop_cache *bnc;
1336
1337 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1338 bnc) {
1339 register_zebra_rnh(bnc);
1340 }
1341 }
1342 }
1343
1344 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1345 {
1346 struct bgp *bgp;
1347 struct bgp_nexthop_cache *bnc;
1348 struct nexthop *nhop;
1349 struct interface *ifp;
1350 struct prefix p;
1351 ifindex_t ifindex = 0;
1352
1353 if (peer->ifp)
1354 return;
1355
1356 bgp = peer->bgp;
1357 if (!sockunion2hostprefix(&peer->su, &p)) {
1358 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1359 __func__, peer->host);
1360 return;
1361 }
1362
1363 if (p.family != AF_INET6)
1364 return;
1365 /*
1366 * Gather the ifindex for if up/down events to be
1367 * tagged into this fun
1368 */
1369 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1370 ifindex = peer->su.sin6.sin6_scope_id;
1371
1372 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1373 if (!bnc)
1374 return;
1375
1376 if (peer != bnc->nht_info)
1377 return;
1378
1379 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1380 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1381
1382 if (!ifp)
1383 continue;
1384
1385 zclient_send_interface_radv_req(zclient,
1386 nhop->vrf_id,
1387 ifp, true,
1388 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1389 }
1390 }
1391
1392 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1393 {
1394 struct bgp *bgp;
1395 struct bgp_nexthop_cache *bnc;
1396 struct nexthop *nhop;
1397 struct interface *ifp;
1398 struct prefix p;
1399 ifindex_t ifindex = 0;
1400
1401 if (peer->ifp)
1402 return;
1403
1404 bgp = peer->bgp;
1405
1406 if (!sockunion2hostprefix(&peer->su, &p)) {
1407 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1408 __func__, peer->host);
1409 return;
1410 }
1411
1412 if (p.family != AF_INET6)
1413 return;
1414 /*
1415 * Gather the ifindex for if up/down events to be
1416 * tagged into this fun
1417 */
1418 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1419 ifindex = peer->su.sin6.sin6_scope_id;
1420
1421 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1422 if (!bnc)
1423 return;
1424
1425 if (peer != bnc->nht_info)
1426 return;
1427
1428 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1429 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1430
1431 if (!ifp)
1432 continue;
1433
1434 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1435 0);
1436 }
1437 }
1438
1439 /****************************************************************************
1440 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1441 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1442 * left to the application using it.
1443 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1444 * failover of remote ES links.
1445 ***************************************************************************/
1446 static bitfield_t bgp_nh_id_bitmap;
1447 static uint32_t bgp_l3nhg_start;
1448
1449 /* XXX - currently we do nothing on the callbacks */
1450 static void bgp_l3nhg_add_cb(const char *name)
1451 {
1452 }
1453
1454 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1455 {
1456 }
1457
1458 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1459 const struct nexthop *nhop)
1460 {
1461 }
1462
1463 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1464 const struct nexthop *nhop)
1465 {
1466 }
1467
1468 static void bgp_l3nhg_del_cb(const char *name)
1469 {
1470 }
1471
1472 static void bgp_l3nhg_zebra_init(void)
1473 {
1474 static bool bgp_l3nhg_zebra_inited;
1475 if (bgp_l3nhg_zebra_inited)
1476 return;
1477
1478 bgp_l3nhg_zebra_inited = true;
1479 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1480 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1481 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1482 bgp_l3nhg_del_cb);
1483 }
1484
1485
1486 void bgp_l3nhg_init(void)
1487 {
1488 uint32_t id_max;
1489
1490 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1491 bf_init(bgp_nh_id_bitmap, id_max);
1492 bf_assign_zero_index(bgp_nh_id_bitmap);
1493
1494 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1495 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1496 bgp_l3nhg_start + id_max);
1497 }
1498
1499 void bgp_l3nhg_finish(void)
1500 {
1501 bf_free(bgp_nh_id_bitmap);
1502 }
1503
1504 uint32_t bgp_l3nhg_id_alloc(void)
1505 {
1506 uint32_t nhg_id = 0;
1507
1508 bgp_l3nhg_zebra_init();
1509 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1510 if (nhg_id)
1511 nhg_id += bgp_l3nhg_start;
1512
1513 return nhg_id;
1514 }
1515
1516 void bgp_l3nhg_id_free(uint32_t nhg_id)
1517 {
1518 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1519 return;
1520
1521 nhg_id -= bgp_l3nhg_start;
1522
1523 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1524 }