]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
Merge pull request #12931 from SaiGomathiN/yang
[mirror_frr.git] / bgpd / bgp_nht.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* BGP Nexthop tracking
3 * Copyright (C) 2013 Cumulus Networks, Inc.
4 */
5
6 #include <zebra.h>
7
8 #include "command.h"
9 #include "frrevent.h"
10 #include "prefix.h"
11 #include "zclient.h"
12 #include "stream.h"
13 #include "network.h"
14 #include "log.h"
15 #include "memory.h"
16 #include "nexthop.h"
17 #include "vrf.h"
18 #include "filter.h"
19 #include "nexthop_group.h"
20
21 #include "bgpd/bgpd.h"
22 #include "bgpd/bgp_table.h"
23 #include "bgpd/bgp_route.h"
24 #include "bgpd/bgp_attr.h"
25 #include "bgpd/bgp_nexthop.h"
26 #include "bgpd/bgp_debug.h"
27 #include "bgpd/bgp_errors.h"
28 #include "bgpd/bgp_nht.h"
29 #include "bgpd/bgp_fsm.h"
30 #include "bgpd/bgp_zebra.h"
31 #include "bgpd/bgp_flowspec_util.h"
32 #include "bgpd/bgp_evpn.h"
33 #include "bgpd/bgp_rd.h"
34
35 extern struct zclient *zclient;
36
37 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
38 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
39 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
40 static void bgp_nht_ifp_initial(struct event *thread);
41
42 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
43 {
44 return (bgp_zebra_num_connects() == 0
45 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
46 && bnc->nexthop_num > 0));
47 }
48
49 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
50 struct bgp_path_info *path)
51 {
52 struct interface *ifp = NULL;
53 struct nexthop *nexthop;
54 struct bgp_interface *iifp;
55 struct peer *peer;
56
57 if (!path->extra || !path->extra->peer_orig)
58 return false;
59
60 peer = path->extra->peer_orig;
61
62 /* only connected ebgp peers are valid */
63 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
64 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
65 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
66 return false;
67
68 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
69 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
70 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
71 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
72 ifp = if_lookup_by_index(
73 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
74 bnc->bgp->vrf_id);
75 }
76 if (!ifp)
77 continue;
78 iifp = ifp->info;
79 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
80 return true;
81 }
82 return false;
83 }
84
85 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
86 struct bgp_path_info *path)
87 {
88 struct interface *ifp = NULL;
89 struct nexthop *nexthop;
90
91 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
92 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
93 ifp = if_lookup_by_index(
94 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
95 bnc->bgp->vrf_id);
96 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
97 ifp->ll_type == ZEBRA_LLT_IP6GRE))
98 break;
99 }
100 }
101 if (!ifp)
102 return false;
103
104 if (CHECK_FLAG(path->attr->rmap_change_flags,
105 BATTR_RMAP_L3VPN_ACCEPT_GRE))
106 return true;
107
108 return false;
109 }
110
111 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
112 struct bgp_path_info *path)
113 {
114 /*
115 * - In the case of MPLS-VPN, the label is learned from LDP or other
116 * protocols, and nexthop tracking is enabled for the label.
117 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
118 * - In the case of SRv6-VPN, we need to track the reachability to the
119 * SID (in other words, IPv6 address). As in MPLS, we need to record
120 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
121 * currently not implemented, and this function assumes that all
122 * Transit routes for SRv6-VPN are valid.
123 * - Otherwise check for mpls-gre acceptance
124 */
125 return (bgp_zebra_num_connects() == 0 ||
126 (bnc && (bnc->nexthop_num > 0 &&
127 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
128 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
129 bnc->bgp->srv6_enabled ||
130 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
131 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
132 }
133
134 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
135 {
136 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
137 if (BGP_DEBUG(nht, NHT))
138 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
139 &bnc->prefix, bnc->ifindex, bnc->srte_color,
140 bnc->bgp->name_pretty);
141 /* only unregister if this is the last nh for this prefix*/
142 if (!bnc_existing_for_prefix(bnc))
143 unregister_zebra_rnh(bnc);
144 bnc_free(bnc);
145 }
146 }
147
148 void bgp_unlink_nexthop(struct bgp_path_info *path)
149 {
150 struct bgp_nexthop_cache *bnc = path->nexthop;
151
152 if (!bnc)
153 return;
154
155 path_nh_map(path, NULL, false);
156
157 bgp_unlink_nexthop_check(bnc);
158 }
159
160 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
161 {
162 struct prefix pp;
163 struct prefix pt;
164 struct bgp_nexthop_cache *bncp, *bnct;
165 afi_t afi;
166 ifindex_t ifindex = 0;
167
168 if (!sockunion2hostprefix(&from->su, &pp))
169 return;
170
171 /*
172 * Gather the ifindex for if up/down events to be
173 * tagged into this fun
174 */
175 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
176 ifindex = from->su.sin6.sin6_scope_id;
177
178 afi = family2afi(pp.family);
179 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
180
181 if (!sockunion2hostprefix(&to->su, &pt))
182 return;
183
184 /*
185 * Gather the ifindex for if up/down events to be
186 * tagged into this fun
187 */
188 ifindex = 0;
189 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
190 ifindex = to->su.sin6.sin6_scope_id;
191 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
192
193 if (bnct != bncp)
194 return;
195
196 if (bnct)
197 bnct->nht_info = to;
198 }
199
200 /*
201 * Returns the bnc whose bnc->nht_info matches the LL peer by
202 * looping through the IPv6 nexthop table
203 */
204 static struct bgp_nexthop_cache *
205 bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
206 {
207 struct bgp_nexthop_cache *bnc;
208
209 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
210 bnc) {
211 if (bnc->nht_info == peer) {
212 if (BGP_DEBUG(nht, NHT)) {
213 zlog_debug(
214 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
215 &bnc->prefix, bnc->ifindex,
216 bnc->srte_color, bnc, peer->host,
217 peer->bgp->name_pretty, peer);
218 }
219 return bnc;
220 }
221 }
222
223 if (BGP_DEBUG(nht, NHT))
224 zlog_debug(
225 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
226 peer->host, peer->bgp->name_pretty, peer);
227
228 return NULL;
229 }
230
231 void bgp_unlink_nexthop_by_peer(struct peer *peer)
232 {
233 struct prefix p;
234 struct bgp_nexthop_cache *bnc;
235 afi_t afi = family2afi(peer->su.sa.sa_family);
236 ifindex_t ifindex = 0;
237
238 if (!sockunion2hostprefix(&peer->su, &p)) {
239 /*
240 * In scenarios where unnumbered BGP session is brought
241 * down by shutting down the interface before unconfiguring
242 * the BGP neighbor, neighbor information in peer->su.sa
243 * will be cleared when the interface is shutdown. So
244 * during the deletion of unnumbered bgp peer, above check
245 * will return true. Therefore, in this case,BGP needs to
246 * find the bnc whose bnc->nht_info matches the
247 * peer being deleted and free it.
248 */
249 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
250 } else {
251 /*
252 * Gather the ifindex for if up/down events to be
253 * tagged into this fun
254 */
255 if (afi == AFI_IP6 &&
256 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
257 ifindex = peer->su.sin6.sin6_scope_id;
258 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
259 ifindex);
260 }
261
262 if (!bnc)
263 return;
264
265 /* cleanup the peer reference */
266 bnc->nht_info = NULL;
267
268 bgp_unlink_nexthop_check(bnc);
269 }
270
271 /*
272 * A route and its nexthop might belong to different VRFs. Therefore,
273 * we need both the bgp_route and bgp_nexthop pointers.
274 */
275 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
276 afi_t afi, safi_t safi, struct bgp_path_info *pi,
277 struct peer *peer, int connected,
278 const struct prefix *orig_prefix)
279 {
280 struct bgp_nexthop_cache_head *tree = NULL;
281 struct bgp_nexthop_cache *bnc;
282 struct bgp_path_info *bpi_ultimate;
283 struct prefix p;
284 uint32_t srte_color = 0;
285 int is_bgp_static_route = 0;
286 ifindex_t ifindex = 0;
287
288 if (pi) {
289 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
290 && (pi->sub_type == BGP_ROUTE_STATIC))
291 ? 1
292 : 0;
293
294 /* Since Extended Next-hop Encoding (RFC5549) support, we want
295 to derive
296 address-family from the next-hop. */
297 if (!is_bgp_static_route)
298 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
299 : AFI_IP;
300
301 /* Validation for the ipv4 mapped ipv6 nexthop. */
302 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
303 afi = AFI_IP;
304 }
305
306 /* This will return true if the global IPv6 NH is a link local
307 * addr */
308 if (make_prefix(afi, pi, &p) < 0)
309 return 1;
310
311 if (!is_bgp_static_route && orig_prefix
312 && prefix_same(&p, orig_prefix)) {
313 if (BGP_DEBUG(nht, NHT)) {
314 zlog_debug(
315 "%s(%pFX): prefix loops through itself",
316 __func__, &p);
317 }
318 return 0;
319 }
320
321 srte_color = pi->attr->srte_color;
322 } else if (peer) {
323 /*
324 * Gather the ifindex for if up/down events to be
325 * tagged into this fun
326 */
327 if (afi == AFI_IP6 && peer->conf_if &&
328 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
329 ifindex = peer->su.sin6.sin6_scope_id;
330 if (ifindex == 0) {
331 if (BGP_DEBUG(nht, NHT)) {
332 zlog_debug(
333 "%s: Unable to locate ifindex, waiting till we have one",
334 peer->conf_if);
335 }
336 return 0;
337 }
338 }
339
340 if (!sockunion2hostprefix(&peer->su, &p)) {
341 if (BGP_DEBUG(nht, NHT)) {
342 zlog_debug(
343 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
344 __func__, afi, AFI_IP, AFI_IP6);
345 }
346 return 0;
347 }
348 } else
349 return 0;
350
351 if (is_bgp_static_route)
352 tree = &bgp_nexthop->import_check_table[afi];
353 else
354 tree = &bgp_nexthop->nexthop_cache_table[afi];
355
356 bnc = bnc_find(tree, &p, srte_color, ifindex);
357 if (!bnc) {
358 bnc = bnc_new(tree, &p, srte_color, ifindex);
359 bnc->bgp = bgp_nexthop;
360 if (BGP_DEBUG(nht, NHT))
361 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
362 &bnc->prefix, bnc->ifindex, bnc->srte_color,
363 bnc->bgp->name_pretty, peer);
364 } else {
365 if (BGP_DEBUG(nht, NHT))
366 zlog_debug(
367 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
368 &bnc->prefix, bnc->ifindex,
369 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
370 bnc->path_count, bnc->nht_info);
371 }
372
373 if (pi && is_route_parent_evpn(pi))
374 bnc->is_evpn_gwip_nexthop = true;
375
376 if (is_bgp_static_route) {
377 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
378
379 /* If we're toggling the type, re-register */
380 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
381 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
382 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
383 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
384 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
385 } else if ((!CHECK_FLAG(bgp_route->flags,
386 BGP_FLAG_IMPORT_CHECK))
387 && CHECK_FLAG(bnc->flags,
388 BGP_STATIC_ROUTE_EXACT_MATCH)) {
389 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
390 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
391 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
392 }
393 }
394 /* When nexthop is already known, but now requires 'connected'
395 * resolution,
396 * re-register it. The reverse scenario where the nexthop currently
397 * requires
398 * 'connected' resolution does not need a re-register (i.e., we treat
399 * 'connected-required' as an override) except in the scenario where
400 * this
401 * is actually a case of tracking a peer for connectivity (e.g., after
402 * disable connected-check).
403 * NOTE: We don't track the number of paths separately for 'connected-
404 * required' vs 'connected-not-required' as this change is not a common
405 * scenario.
406 */
407 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
408 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
409 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
410 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
411 } else if (peer && !connected
412 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
413 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
414 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
415 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
416 }
417 if (peer && (bnc->ifindex != ifindex)) {
418 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
419 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
420 bnc->ifindex = ifindex;
421 }
422 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
423 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
424 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
425 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
426 && !is_default_host_route(&bnc->prefix))
427 register_zebra_rnh(bnc);
428
429 if (pi && pi->nexthop != bnc) {
430 /* Unlink from existing nexthop cache, if any. This will also
431 * free
432 * the nexthop cache entry, if appropriate.
433 */
434 bgp_unlink_nexthop(pi);
435
436 /* updates NHT pi list reference */
437 path_nh_map(pi, bnc, true);
438
439 bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
440 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
441 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
442 bnc->metric;
443 else if (bpi_ultimate->extra)
444 bpi_ultimate->extra->igpmetric = 0;
445 } else if (peer) {
446 /*
447 * Let's not accidentally save the peer data for a peer
448 * we are going to throw away in a second or so.
449 * When we come back around we'll fix up this
450 * data properly in replace_nexthop_by_peer
451 */
452 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
453 bnc->nht_info = (void *)peer; /* NHT peer reference */
454 }
455
456 /*
457 * We are cheating here. Views have no associated underlying
458 * ability to detect nexthops. So when we have a view
459 * just tell everyone the nexthop is valid
460 */
461 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
462 return 1;
463 else if (safi == SAFI_UNICAST && pi &&
464 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
465 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
466 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
467 else
468 return (bgp_isvalid_nexthop(bnc));
469 }
470
471 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
472 {
473 struct bgp_nexthop_cache *bnc;
474 struct prefix p;
475 ifindex_t ifindex = 0;
476
477 if (!peer)
478 return;
479
480 /*
481 * In case the below check evaluates true and if
482 * the bnc has not been freed at this point, then
483 * we might have to do something similar to what's
484 * done in bgp_unlink_nexthop_by_peer(). Since
485 * bgp_unlink_nexthop_by_peer() loops through the
486 * nodes of V6 nexthop cache to find the bnc, it is
487 * currently not being called here.
488 */
489 if (!sockunion2hostprefix(&peer->su, &p))
490 return;
491 /*
492 * Gather the ifindex for if up/down events to be
493 * tagged into this fun
494 */
495 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
496 ifindex = peer->su.sin6.sin6_scope_id;
497 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
498 &p, 0, ifindex);
499 if (!bnc) {
500 if (BGP_DEBUG(nht, NHT))
501 zlog_debug(
502 "Cannot find connected NHT node for peer %s(%s)",
503 peer->host, peer->bgp->name_pretty);
504 return;
505 }
506
507 if (bnc->nht_info != peer) {
508 if (BGP_DEBUG(nht, NHT))
509 zlog_debug(
510 "Connected NHT %p node for peer %s(%s) points to %p",
511 bnc, peer->host, bnc->bgp->name_pretty,
512 bnc->nht_info);
513 return;
514 }
515
516 bnc->nht_info = NULL;
517
518 if (LIST_EMPTY(&(bnc->paths))) {
519 if (BGP_DEBUG(nht, NHT))
520 zlog_debug(
521 "Freeing connected NHT node %p for peer %s(%s)",
522 bnc, peer->host, bnc->bgp->name_pretty);
523 unregister_zebra_rnh(bnc);
524 bnc_free(bnc);
525 }
526 }
527
528 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
529 struct zapi_route *nhr,
530 bool import_check)
531 {
532 struct nexthop *nexthop;
533 struct nexthop *oldnh;
534 struct nexthop *nhlist_head = NULL;
535 struct nexthop *nhlist_tail = NULL;
536 int i;
537 bool evpn_resolved = false;
538
539 bnc->last_update = monotime(NULL);
540 bnc->change_flags = 0;
541
542 /* debug print the input */
543 if (BGP_DEBUG(nht, NHT)) {
544 char bnc_buf[BNC_FLAG_DUMP_SIZE];
545
546 zlog_debug(
547 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
548 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
549 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
550 nhr->nexthop_num, bnc->nexthop_num,
551 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
552 sizeof(bnc_buf)));
553 }
554
555 if (nhr->metric != bnc->metric)
556 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
557
558 if (nhr->nexthop_num != bnc->nexthop_num)
559 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
560
561 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
562 !prefix_same(&bnc->prefix, &nhr->prefix))) {
563 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
564 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
565 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
566 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
567
568 bnc_nexthop_free(bnc);
569 bnc->nexthop = NULL;
570
571 if (BGP_DEBUG(nht, NHT))
572 zlog_debug(
573 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
574 __func__, &bnc->prefix, &nhr->prefix);
575 } else if (nhr->nexthop_num) {
576 struct peer *peer = bnc->nht_info;
577
578 /* notify bgp fsm if nbr ip goes from invalid->valid */
579 if (!bnc->nexthop_num)
580 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
581
582 if (!bnc->is_evpn_gwip_nexthop)
583 bnc->flags |= BGP_NEXTHOP_VALID;
584 bnc->metric = nhr->metric;
585 bnc->nexthop_num = nhr->nexthop_num;
586
587 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
588
589 for (i = 0; i < nhr->nexthop_num; i++) {
590 int num_labels = 0;
591
592 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
593
594 /*
595 * Turn on RA for the v6 nexthops
596 * we receive from bgp. This is to allow us
597 * to work with v4 routing over v6 nexthops
598 */
599 if (peer && !peer->ifp
600 && CHECK_FLAG(peer->flags,
601 PEER_FLAG_CAPABILITY_ENHE)
602 && nhr->prefix.family == AF_INET6
603 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
604 struct interface *ifp;
605
606 ifp = if_lookup_by_index(nexthop->ifindex,
607 nexthop->vrf_id);
608 if (ifp)
609 zclient_send_interface_radv_req(
610 zclient, nexthop->vrf_id, ifp,
611 true,
612 BGP_UNNUM_DEFAULT_RA_INTERVAL);
613 }
614 /* There is at least one label-switched path */
615 if (nexthop->nh_label &&
616 nexthop->nh_label->num_labels) {
617
618 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
619 num_labels = nexthop->nh_label->num_labels;
620 }
621
622 if (BGP_DEBUG(nht, NHT)) {
623 char buf[NEXTHOP_STRLEN];
624 zlog_debug(
625 " nhop via %s (%d labels)",
626 nexthop2str(nexthop, buf, sizeof(buf)),
627 num_labels);
628 }
629
630 if (nhlist_tail) {
631 nhlist_tail->next = nexthop;
632 nhlist_tail = nexthop;
633 } else {
634 nhlist_tail = nexthop;
635 nhlist_head = nexthop;
636 }
637
638 /* No need to evaluate the nexthop if we have already
639 * determined
640 * that there has been a change.
641 */
642 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
643 continue;
644
645 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
646 if (nexthop_same(oldnh, nexthop))
647 break;
648
649 if (!oldnh)
650 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
651 }
652 bnc_nexthop_free(bnc);
653 bnc->nexthop = nhlist_head;
654
655 /*
656 * Gateway IP nexthop is L3 reachable. Mark it as
657 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
658 * remote EVPN RT-2.
659 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
660 * When its mapping with EVPN RT-2 is established, unset
661 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
662 */
663 if (bnc->is_evpn_gwip_nexthop) {
664 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
665
666 if (BGP_DEBUG(nht, NHT))
667 zlog_debug(
668 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
669 &bnc->prefix,
670 (evpn_resolved ? "successful"
671 : "failed"));
672
673 if (evpn_resolved) {
674 bnc->flags |= BGP_NEXTHOP_VALID;
675 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
676 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
677 } else {
678 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
679 bnc->flags &= ~BGP_NEXTHOP_VALID;
680 }
681 }
682 } else {
683 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
684 bnc->flags &= ~BGP_NEXTHOP_VALID;
685 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
686 bnc->nexthop_num = nhr->nexthop_num;
687
688 /* notify bgp fsm if nbr ip goes from valid->invalid */
689 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
690
691 bnc_nexthop_free(bnc);
692 bnc->nexthop = NULL;
693 }
694
695 evaluate_paths(bnc);
696 }
697
698 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
699 struct bgp_nexthop_cache_head *table,
700 struct interface *ifp, bool up)
701 {
702 struct bgp_nexthop_cache *bnc;
703
704 frr_each (bgp_nexthop_cache, table, bnc) {
705 if (bnc->ifindex != ifp->ifindex)
706 continue;
707
708 bnc->last_update = monotime(NULL);
709 bnc->change_flags = 0;
710
711 /*
712 * For interface based routes ( ala the v6 LL routes
713 * that this was written for ) the metric received
714 * for the connected route is 0 not 1.
715 */
716 bnc->metric = 0;
717 if (up) {
718 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
719 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
720 bnc->nexthop_num = 1;
721 } else {
722 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
723 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
724 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
725 bnc->nexthop_num = 0;
726 }
727
728 evaluate_paths(bnc);
729 }
730 }
731 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
732 {
733 struct bgp *bgp;
734
735 bgp = ifp->vrf->info;
736 if (!bgp)
737 return;
738
739 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
740 up);
741 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
742 up);
743 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
744 up);
745 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
746 up);
747 }
748
749 void bgp_nht_ifp_up(struct interface *ifp)
750 {
751 bgp_nht_ifp_handle(ifp, true);
752 }
753
754 void bgp_nht_ifp_down(struct interface *ifp)
755 {
756 bgp_nht_ifp_handle(ifp, false);
757 }
758
759 static void bgp_nht_ifp_initial(struct event *thread)
760 {
761 ifindex_t ifindex = EVENT_VAL(thread);
762 struct bgp *bgp = EVENT_ARG(thread);
763 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
764
765 if (!ifp)
766 return;
767
768 if (BGP_DEBUG(nht, NHT))
769 zlog_debug(
770 "Handle NHT initial update for Intf %s(%d) status %s",
771 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
772
773 if (if_is_up(ifp))
774 bgp_nht_ifp_up(ifp);
775 else
776 bgp_nht_ifp_down(ifp);
777 }
778
779 /*
780 * So the bnc code has the ability to handle interface up/down
781 * events to properly handle v6 LL peering.
782 * What is happening here:
783 * The event system for peering expects the nht code to
784 * report on the tracking events after we move to active
785 * So let's give the system a chance to report on that event
786 * in a manner that is expected.
787 */
788 void bgp_nht_interface_events(struct peer *peer)
789 {
790 struct bgp *bgp = peer->bgp;
791 struct bgp_nexthop_cache_head *table;
792 struct bgp_nexthop_cache *bnc;
793 struct prefix p;
794 ifindex_t ifindex = 0;
795
796 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
797 return;
798
799 if (!sockunion2hostprefix(&peer->su, &p))
800 return;
801 /*
802 * Gather the ifindex for if up/down events to be
803 * tagged into this fun
804 */
805 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
806 ifindex = peer->su.sin6.sin6_scope_id;
807
808 table = &bgp->nexthop_cache_table[AFI_IP6];
809 bnc = bnc_find(table, &p, 0, ifindex);
810 if (!bnc)
811 return;
812
813 if (bnc->ifindex)
814 event_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
815 bnc->ifindex, NULL);
816 }
817
818 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
819 {
820 struct bgp_nexthop_cache_head *tree = NULL;
821 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
822 struct bgp *bgp;
823 struct prefix match;
824 struct zapi_route nhr;
825 afi_t afi;
826
827 bgp = bgp_lookup_by_vrf_id(vrf_id);
828 if (!bgp) {
829 flog_err(
830 EC_BGP_NH_UPD,
831 "parse nexthop update: instance not found for vrf_id %u",
832 vrf_id);
833 return;
834 }
835
836 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
837 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
838 bgp->name_pretty);
839 return;
840 }
841
842 afi = family2afi(match.family);
843 tree = &bgp->nexthop_cache_table[afi];
844
845 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
846 if (!bnc_nhc) {
847 if (BGP_DEBUG(nht, NHT))
848 zlog_debug(
849 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
850 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
851 } else
852 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
853
854 tree = &bgp->import_check_table[afi];
855
856 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
857 if (!bnc_import) {
858 if (BGP_DEBUG(nht, NHT))
859 zlog_debug(
860 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
861 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
862 } else
863 bgp_process_nexthop_update(bnc_import, &nhr, true);
864
865 /*
866 * HACK: if any BGP route is dependant on an SR-policy that doesn't
867 * exist, zebra will never send NH updates relative to that policy. In
868 * that case, whenever we receive an update about a colorless NH, update
869 * the corresponding colorful NHs that share the same endpoint but that
870 * are inactive. This ugly hack should work around the problem at the
871 * cost of a performance pernalty. Long term, what should be done is to
872 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
873 * which should provide a better infrastructure to solve this issue in
874 * a more efficient and elegant way.
875 */
876 if (nhr.srte_color == 0 && bnc_nhc) {
877 struct bgp_nexthop_cache *bnc_iter;
878
879 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
880 bnc_iter) {
881 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
882 bnc_iter->srte_color == 0 ||
883 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
884 continue;
885
886 bgp_process_nexthop_update(bnc_iter, &nhr, false);
887 }
888 }
889 }
890
891 /*
892 * Cleanup nexthop registration and status information for BGP nexthops
893 * pertaining to this VRF. This is invoked upon VRF deletion.
894 */
895 void bgp_cleanup_nexthops(struct bgp *bgp)
896 {
897 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
898 struct bgp_nexthop_cache *bnc;
899
900 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
901 bnc) {
902 /* Clear relevant flags. */
903 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
904 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
905 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
906 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
907 }
908 }
909 }
910
911 /**
912 * make_prefix - make a prefix structure from the path (essentially
913 * path's node.
914 */
915 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
916 {
917
918 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
919 && (pi->sub_type == BGP_ROUTE_STATIC))
920 ? 1
921 : 0;
922 struct bgp_dest *net = pi->net;
923 const struct prefix *p_orig = bgp_dest_get_prefix(net);
924 struct in_addr ipv4;
925
926 if (p_orig->family == AF_FLOWSPEC) {
927 if (!pi->peer)
928 return -1;
929 return bgp_flowspec_get_first_nh(pi->peer->bgp,
930 pi, p, afi);
931 }
932 memset(p, 0, sizeof(struct prefix));
933 switch (afi) {
934 case AFI_IP:
935 p->family = AF_INET;
936 if (is_bgp_static) {
937 p->u.prefix4 = p_orig->u.prefix4;
938 p->prefixlen = p_orig->prefixlen;
939 } else {
940 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
941 ipv4_mapped_ipv6_to_ipv4(
942 &pi->attr->mp_nexthop_global, &ipv4);
943 p->u.prefix4 = ipv4;
944 p->prefixlen = IPV4_MAX_BITLEN;
945 } else {
946 if (p_orig->family == AF_EVPN)
947 p->u.prefix4 =
948 pi->attr->mp_nexthop_global_in;
949 else
950 p->u.prefix4 = pi->attr->nexthop;
951 p->prefixlen = IPV4_MAX_BITLEN;
952 }
953 }
954 break;
955 case AFI_IP6:
956 p->family = AF_INET6;
957
958 if (is_bgp_static) {
959 p->u.prefix6 = p_orig->u.prefix6;
960 p->prefixlen = p_orig->prefixlen;
961 } else {
962 /* If we receive MP_REACH nexthop with ::(LL)
963 * or LL(LL), use LL address as nexthop cache.
964 */
965 if (pi->attr->mp_nexthop_len
966 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
967 && (IN6_IS_ADDR_UNSPECIFIED(
968 &pi->attr->mp_nexthop_global)
969 || IN6_IS_ADDR_LINKLOCAL(
970 &pi->attr->mp_nexthop_global)))
971 p->u.prefix6 = pi->attr->mp_nexthop_local;
972 /* If we receive MR_REACH with (GA)::(LL)
973 * then check for route-map to choose GA or LL
974 */
975 else if (pi->attr->mp_nexthop_len
976 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
977 if (pi->attr->mp_nexthop_prefer_global)
978 p->u.prefix6 =
979 pi->attr->mp_nexthop_global;
980 else
981 p->u.prefix6 =
982 pi->attr->mp_nexthop_local;
983 } else
984 p->u.prefix6 = pi->attr->mp_nexthop_global;
985 p->prefixlen = IPV6_MAX_BITLEN;
986 }
987 break;
988 default:
989 if (BGP_DEBUG(nht, NHT)) {
990 zlog_debug(
991 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
992 __func__, afi, AFI_IP, AFI_IP6);
993 }
994 break;
995 }
996 return 0;
997 }
998
999 /**
1000 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1001 * command to Zebra.
1002 * ARGUMENTS:
1003 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1004 * int command -- command to send to zebra
1005 * RETURNS:
1006 * void.
1007 */
1008 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1009 {
1010 bool exact_match = false;
1011 bool resolve_via_default = false;
1012 int ret;
1013
1014 if (!zclient)
1015 return;
1016
1017 /* Don't try to register if Zebra doesn't know of this instance. */
1018 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1019 if (BGP_DEBUG(zebra, ZEBRA))
1020 zlog_debug(
1021 "%s: No zebra instance to talk to, not installing NHT entry",
1022 __func__);
1023 return;
1024 }
1025
1026 if (!bgp_zebra_num_connects()) {
1027 if (BGP_DEBUG(zebra, ZEBRA))
1028 zlog_debug(
1029 "%s: We have not connected yet, cannot send nexthops",
1030 __func__);
1031 }
1032 if (command == ZEBRA_NEXTHOP_REGISTER) {
1033 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1034 exact_match = true;
1035 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1036 resolve_via_default = true;
1037 }
1038
1039 if (BGP_DEBUG(zebra, ZEBRA))
1040 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1041 zserv_command_string(command), &bnc->prefix,
1042 bnc->bgp->name_pretty);
1043
1044 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1045 exact_match, resolve_via_default,
1046 bnc->bgp->vrf_id);
1047 if (ret == ZCLIENT_SEND_FAILURE) {
1048 flog_warn(EC_BGP_ZEBRA_SEND,
1049 "sendmsg_nexthop: zclient_send_message() failed");
1050 return;
1051 }
1052
1053 if (command == ZEBRA_NEXTHOP_REGISTER)
1054 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1055 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1056 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1057 return;
1058 }
1059
1060 /**
1061 * register_zebra_rnh - register a NH/route with Zebra for notification
1062 * when the route or the route to the nexthop changes.
1063 * ARGUMENTS:
1064 * struct bgp_nexthop_cache *bnc
1065 * RETURNS:
1066 * void.
1067 */
1068 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1069 {
1070 /* Check if we have already registered */
1071 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1072 return;
1073
1074 if (bnc->ifindex) {
1075 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1076 return;
1077 }
1078
1079 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1080 }
1081
1082 /**
1083 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1084 * ARGUMENTS:
1085 * struct bgp_nexthop_cache *bnc
1086 * RETURNS:
1087 * void.
1088 */
1089 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1090 {
1091 /* Check if we have already registered */
1092 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1093 return;
1094
1095 if (bnc->ifindex) {
1096 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1097 return;
1098 }
1099
1100 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1101 }
1102
1103 /**
1104 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1105 * ARGUMENTS:
1106 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1107 * RETURNS:
1108 * void.
1109 */
1110 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1111 {
1112 struct bgp_dest *dest;
1113 struct bgp_path_info *path;
1114 struct bgp_path_info *bpi_ultimate;
1115 int afi;
1116 struct peer *peer = (struct peer *)bnc->nht_info;
1117 struct bgp_table *table;
1118 safi_t safi;
1119 struct bgp *bgp_path;
1120 const struct prefix *p;
1121
1122 if (BGP_DEBUG(nht, NHT)) {
1123 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1124 char chg_buf[BNC_FLAG_DUMP_SIZE];
1125
1126 zlog_debug(
1127 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1128 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1129 bnc->bgp->name_pretty,
1130 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1131 sizeof(bnc_buf)),
1132 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1133 sizeof(bnc_buf)));
1134 }
1135
1136 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1137 if (!(path->type == ZEBRA_ROUTE_BGP
1138 && ((path->sub_type == BGP_ROUTE_NORMAL)
1139 || (path->sub_type == BGP_ROUTE_STATIC)
1140 || (path->sub_type == BGP_ROUTE_IMPORTED))))
1141 continue;
1142
1143 dest = path->net;
1144 assert(dest && bgp_dest_table(dest));
1145 p = bgp_dest_get_prefix(dest);
1146 afi = family2afi(p->family);
1147 table = bgp_dest_table(dest);
1148 safi = table->safi;
1149
1150 /*
1151 * handle routes from other VRFs (they can have a
1152 * nexthop in THIS VRF). bgp_path is the bgp instance
1153 * that owns the route referencing this nexthop.
1154 */
1155 bgp_path = table->bgp;
1156
1157 /*
1158 * Path becomes valid/invalid depending on whether the nexthop
1159 * reachable/unreachable.
1160 *
1161 * In case of unicast routes that were imported from vpn
1162 * and that have labels, they are valid only if there are
1163 * nexthops with labels
1164 *
1165 * If the nexthop is EVPN gateway-IP,
1166 * do not check for a valid label.
1167 */
1168
1169 bool bnc_is_valid_nexthop = false;
1170 bool path_valid = false;
1171
1172 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1173 && path->extra && path->extra->num_labels
1174 && (path->attr->evpn_overlay.type
1175 != OVERLAY_INDEX_GATEWAY_IP)) {
1176 bnc_is_valid_nexthop =
1177 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1178 : false;
1179 } else {
1180 if (bgp_update_martian_nexthop(
1181 bnc->bgp, afi, safi, path->type,
1182 path->sub_type, path->attr, dest)) {
1183 if (BGP_DEBUG(nht, NHT))
1184 zlog_debug(
1185 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1186 __func__, dest, bgp_path->name);
1187 } else
1188 bnc_is_valid_nexthop =
1189 bgp_isvalid_nexthop(bnc) ? true : false;
1190 }
1191
1192 if (BGP_DEBUG(nht, NHT)) {
1193
1194 if (dest->pdest) {
1195 char rd_buf[RD_ADDRSTRLEN];
1196
1197 prefix_rd2str(
1198 (struct prefix_rd *)bgp_dest_get_prefix(
1199 dest->pdest),
1200 rd_buf, sizeof(rd_buf),
1201 bgp_get_asnotation(bnc->bgp));
1202 zlog_debug(
1203 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
1204 afi, safi, dest, rd_buf,
1205 bgp_path->name_pretty, path->flags);
1206 } else
1207 zlog_debug(
1208 "... eval path %d/%d %pBD %s flags 0x%x",
1209 afi, safi, dest, bgp_path->name_pretty,
1210 path->flags);
1211 }
1212
1213 /* Skip paths marked for removal or as history. */
1214 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1215 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1216 continue;
1217
1218 /* Copy the metric to the path. Will be used for bestpath
1219 * computation */
1220 bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
1221 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1222 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
1223 bnc->metric;
1224 else if (bpi_ultimate->extra)
1225 bpi_ultimate->extra->igpmetric = 0;
1226
1227 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1228 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1229 || path->attr->srte_color != 0)
1230 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1231
1232 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1233 if (path_valid != bnc_is_valid_nexthop) {
1234 if (path_valid) {
1235 /* No longer valid, clear flag; also for EVPN
1236 * routes, unimport from VRFs if needed.
1237 */
1238 bgp_aggregate_decrement(bgp_path, p, path, afi,
1239 safi);
1240 bgp_path_info_unset_flag(dest, path,
1241 BGP_PATH_VALID);
1242 if (safi == SAFI_EVPN &&
1243 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1244 bgp_evpn_unimport_route(bgp_path,
1245 afi, safi, bgp_dest_get_prefix(dest), path);
1246 } else {
1247 /* Path becomes valid, set flag; also for EVPN
1248 * routes, import from VRFs if needed.
1249 */
1250 bgp_path_info_set_flag(dest, path,
1251 BGP_PATH_VALID);
1252 bgp_aggregate_increment(bgp_path, p, path, afi,
1253 safi);
1254 if (safi == SAFI_EVPN &&
1255 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1256 bgp_evpn_import_route(bgp_path,
1257 afi, safi, bgp_dest_get_prefix(dest), path);
1258 }
1259 }
1260
1261 bgp_process(bgp_path, dest, afi, safi);
1262 }
1263
1264 if (peer) {
1265 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1266
1267 if (valid_nexthops) {
1268 /*
1269 * Peering cannot occur across a blackhole nexthop
1270 */
1271 if (bnc->nexthop_num == 1 && bnc->nexthop
1272 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1273 peer->last_reset = PEER_DOWN_WAITING_NHT;
1274 valid_nexthops = 0;
1275 } else
1276 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1277 } else
1278 peer->last_reset = PEER_DOWN_WAITING_NHT;
1279
1280 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1281 if (BGP_DEBUG(nht, NHT))
1282 zlog_debug(
1283 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1284 __func__, peer->host,
1285 peer->bgp->name_pretty,
1286 !!valid_nexthops);
1287 bgp_fsm_nht_update(peer, !!valid_nexthops);
1288 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1289 }
1290 }
1291
1292 RESET_FLAG(bnc->change_flags);
1293 }
1294
1295 /**
1296 * path_nh_map - make or break path-to-nexthop association.
1297 * ARGUMENTS:
1298 * path - pointer to the path structure
1299 * bnc - pointer to the nexthop structure
1300 * make - if set, make the association. if unset, just break the existing
1301 * association.
1302 */
1303 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1304 bool make)
1305 {
1306 if (path->nexthop) {
1307 LIST_REMOVE(path, nh_thread);
1308 path->nexthop->path_count--;
1309 path->nexthop = NULL;
1310 }
1311 if (make) {
1312 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1313 path->nexthop = bnc;
1314 path->nexthop->path_count++;
1315 }
1316 }
1317
1318 /*
1319 * This function is called to register nexthops to zebra
1320 * as that we may have tried to install the nexthops
1321 * before we actually have a zebra connection
1322 */
1323 void bgp_nht_register_nexthops(struct bgp *bgp)
1324 {
1325 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1326 struct bgp_nexthop_cache *bnc;
1327
1328 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1329 bnc) {
1330 register_zebra_rnh(bnc);
1331 }
1332 }
1333 }
1334
1335 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1336 {
1337 struct bgp *bgp;
1338 struct bgp_nexthop_cache *bnc;
1339 struct nexthop *nhop;
1340 struct interface *ifp;
1341 struct prefix p;
1342 ifindex_t ifindex = 0;
1343
1344 if (peer->ifp)
1345 return;
1346
1347 bgp = peer->bgp;
1348 if (!sockunion2hostprefix(&peer->su, &p)) {
1349 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1350 __func__, peer->host);
1351 return;
1352 }
1353
1354 if (p.family != AF_INET6)
1355 return;
1356 /*
1357 * Gather the ifindex for if up/down events to be
1358 * tagged into this fun
1359 */
1360 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1361 ifindex = peer->su.sin6.sin6_scope_id;
1362
1363 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1364 if (!bnc)
1365 return;
1366
1367 if (peer != bnc->nht_info)
1368 return;
1369
1370 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1371 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1372
1373 if (!ifp)
1374 continue;
1375
1376 zclient_send_interface_radv_req(zclient,
1377 nhop->vrf_id,
1378 ifp, true,
1379 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1380 }
1381 }
1382
1383 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1384 {
1385 struct bgp *bgp;
1386 struct bgp_nexthop_cache *bnc;
1387 struct nexthop *nhop;
1388 struct interface *ifp;
1389 struct prefix p;
1390 ifindex_t ifindex = 0;
1391
1392 if (peer->ifp)
1393 return;
1394
1395 bgp = peer->bgp;
1396
1397 if (!sockunion2hostprefix(&peer->su, &p)) {
1398 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1399 __func__, peer->host);
1400 return;
1401 }
1402
1403 if (p.family != AF_INET6)
1404 return;
1405 /*
1406 * Gather the ifindex for if up/down events to be
1407 * tagged into this fun
1408 */
1409 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1410 ifindex = peer->su.sin6.sin6_scope_id;
1411
1412 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1413 if (!bnc)
1414 return;
1415
1416 if (peer != bnc->nht_info)
1417 return;
1418
1419 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1420 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1421
1422 if (!ifp)
1423 continue;
1424
1425 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1426 0);
1427 }
1428 }
1429
1430 /****************************************************************************
1431 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1432 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1433 * left to the application using it.
1434 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1435 * failover of remote ES links.
1436 ***************************************************************************/
1437 static bitfield_t bgp_nh_id_bitmap;
1438 static uint32_t bgp_l3nhg_start;
1439
1440 /* XXX - currently we do nothing on the callbacks */
1441 static void bgp_l3nhg_add_cb(const char *name)
1442 {
1443 }
1444
1445 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1446 {
1447 }
1448
1449 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1450 const struct nexthop *nhop)
1451 {
1452 }
1453
1454 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1455 const struct nexthop *nhop)
1456 {
1457 }
1458
1459 static void bgp_l3nhg_del_cb(const char *name)
1460 {
1461 }
1462
1463 static void bgp_l3nhg_zebra_init(void)
1464 {
1465 static bool bgp_l3nhg_zebra_inited;
1466 if (bgp_l3nhg_zebra_inited)
1467 return;
1468
1469 bgp_l3nhg_zebra_inited = true;
1470 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1471 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1472 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1473 bgp_l3nhg_del_cb);
1474 }
1475
1476
1477 void bgp_l3nhg_init(void)
1478 {
1479 uint32_t id_max;
1480
1481 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1482 bf_init(bgp_nh_id_bitmap, id_max);
1483 bf_assign_zero_index(bgp_nh_id_bitmap);
1484
1485 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1486 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1487 bgp_l3nhg_start + id_max);
1488 }
1489
1490 void bgp_l3nhg_finish(void)
1491 {
1492 bf_free(bgp_nh_id_bitmap);
1493 }
1494
1495 uint32_t bgp_l3nhg_id_alloc(void)
1496 {
1497 uint32_t nhg_id = 0;
1498
1499 bgp_l3nhg_zebra_init();
1500 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1501 if (nhg_id)
1502 nhg_id += bgp_l3nhg_start;
1503
1504 return nhg_id;
1505 }
1506
1507 void bgp_l3nhg_id_free(uint32_t nhg_id)
1508 {
1509 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1510 return;
1511
1512 nhg_id -= bgp_l3nhg_start;
1513
1514 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1515 }